Jan Oosting
Zobrazit profil   Přeložit do jazyka: čeština Přeloženo (zobrazit originál)
	 Další možnosti 31 pro 2007, 10:31
I created a couple of units for Delphi that use the tessdll. The first
one basically translates the tessdll.h file, the second unit defines a
class that can take a bitmap, and OCR it.
There is no proper documentation (yet), so you'll have to have a look
at the source to be able to use. The comments show some of the issues
that had to be overcome in order to use tessdll from Delphi

Jan
==TESSDLL.PAS======================================================

unit TESSDLL;
{**************************************************************************}
{                                                                          }
{    This C DLL header file first (automatic) conversion generated by:     }
{    HeadConv 4.0 (c) 2000 by Bob Swart (aka Dr.Bob - www.drbob42.com)     }
{      Final Delphi-Jedi (Darth) command-line units edition                }
{                                                                          }
{    Generated Date: 02-08-2007                                            }
{    Generated Time: 11:50:26                                              }
{                                                                          }
{**************************************************************************}
interface
uses
  Windows;
{//////////////////////////////////////////////////////////////////////// }
{/// File: tessdll.h }
{/// Description: Windows dll interface for Tesseract. }
{/// Author: Glen Wernersbach }
{/// Created: Tue May 15 10:30:01 PDT 2007 }
{/// }
{/// (C) Copyright 2007, Jetsoftdev. }
{/// Licensed under the Apache License, Version 2.0 (the "License"); }
{/// you may not use this file except in compliance with the License. }
{/// You may obtain a copy of the License at }
{/// http://www.apache.org/licenses/LICENSE-2.0 }
{/// Unless required by applicable law or agreed to in writing, software }
{/// distributed under the License is distributed on an "AS IS" BASIS, }
{/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. }
{/// See the License for the specific language governing permissions and }
{/// limitations under the License. }
{/// }
{    Delphi translation: J. Oosting
{//////////////////////////////////////////////////////////////////////// }

type

TEANYCODE_CHAR = packed record  //*single character */
// It should be noted that the format for char_code for version 2.0
and beyond is UTF8
// which means that ASCII characters will come out as one structure
but other characters
// will be returned in two or more instances of this structure with a
single byte of the
// UTF8 code in each, but each will have the same bounding box.
// Programs which want to handle languagues with different characters
sets will need to
// handle extended characters appropriately, but *all* code needs to
be prepared to
// receive UTF8 coded characters for characters such as bullet and fancy quotes.
  char_code:word;              //*character itself */
  left:SmallInt;                    //*of char (-1) */
  right:smallInt;                   //*of char (-1) */
  top:smallInt;                     //*of char (-1) */
  bottom:smallint;                  //*of char (-1) */
  font_index:smallInt;              //*what font (0) */
  confidence:byte;              //*0=perfect, 100=reject (0/100) */
  point_size:byte;              //*of char, 72=i inch, (10) */
  blanks:shortint;                   //*no of spaces before this char (1) */
  formatting:byte;              //*char formatting (0) */
end;
PEANYCODE_CHAR = ^TEANYCODE_CHAR;

{/**********************************************************************
 * ETEXT_DESC
 * Description of the output of the OCR engine.
 * This structure is used as both a progress monitor and the final
 * output header, since it needs to be a valid progress monitor while
 * the OCR engine is storing its output to shared memory.
 * During progress, all the buffer info is -1.
 * Progress starts at 0 and increases to 100 during OCR. No other constraint.
 * Every progress callback, the OCR engine must set ocr_alive to 1.
 * The HP side will set ocr_alive to 0. Repeated failure to reset
 * to 1 indicates that the OCR engine is dead.
 * If the cancel function is not null then it is called with the number of
 * user words found. If it returns true then operation is cancelled.
 **********************************************************************/
typedef bool (*CANCEL_FUNC)(void* cancel_this, int words);}

TETEXT_DESC = record //*output header */
  count:smallInt;                   //*chars in this buffer(0) */
  progress:smallInt;                //*percent complete increasing (0-100) */
  more_to_come:shortInt;             //*true if not last */
  ocr_alive:shortInt;                //*ocr sets to 1, HP 0 */
  err_code:shortInt;                 //*for errcode use */
  cancel:pointer;            //*returns true to cancel */
  cancel_this:pointer;             //*this or other data for cancel*/
  end_time:longint;              //*time to stop if not 0*/
  text: array[0..31999] of TEANYCODE_CHAR;         //*character data */
end ;
PETEXT_DESC = ^TETEXT_DESC;

{///The functions below provide a c wrapper to a global recognize class object }

{///xsize should be the width of line in bytes times 8 }
{///ysize is the height }
{///pass through a buffer of bytes for a 1 bit per pixel bitmap }
{///BeginPage assumes the first memory address is the bottom of the
image (MS DIB format) }
{///BeginPageUpright assumes the first memory address is the top of
the image (TIFF format) }
{///lang is the code of the language for which the data will be loaded. }
{///(Codes follow ISO 639-2.) If it is NULL, english (eng) will be loaded. }
var
  TessDllBeginPage: function(xsize: cardinal;
                             ysize: cardinal;
                             buf: pointer): LongInt ;cdecl;

var
  TessDllBeginPageLang: function(xsize: cardinal;
                                 ysize: cardinal;
                                 buf: Pointer;
                                 const lang: PChar): LongInt ; cdecl;
var
  TessDllBeginPageUpright: function(xsize: cardinal;
                                    ysize: cardinal;
                                    buf: Pointer;
                                    const lang: PChar): LongInt ; cdecl;
{///Added in version 2.0 to allow users to specify bytes per pixel to do }
{///1 for binary biptmap }
{///8 for gray }
{///24 bit for color RGB }
var
  TessDllBeginPageBPP: function(xsize: cardinal;
                                ysize: cardinal;
                                buf: pointer;
                                bpp: byte): LongInt ; cdecl;

var
  TessDllBeginPageLangBPP: function(xsize: cardinal;
                                    ysize: cardinal;
                                    buf: pointer;
                                    const lang: PChar;
                                    bpp: byte): LongInt ; cdecl;
var
  TessDllBeginPageUprightBPP: function(xsize: cardinal;
                                       ysize: cardinal;
                                       buf: pointer;
                                       const lang: PChar;
                                       bpp: byte): LongInt ; cdecl;

var
  TessDllEndPage: function:longint; cdecl;

{///This allows you to extract one word or section from the bitmap or }
{///the whole page }
{///To extract the whole page just enter zeros for left, right, top, bottom }
{///Note: getting one word at time is not yet optimized for speed. }
{///limit of 32000 character can be returned }
{///see ocrclass.h for a decription of the ETEXT_DESC file }
var
  TessDllRecognize_a_Block: function(left: cardinal;
                                     right: cardinal;
                                     top: cardinal;
                                     bottom: cardinal): PETEXT_DESC; cdecl;
var
  TessDllRecognize_all_Words: function: PETEXT_DESC cdecl;

{///This will release any memory associated with the recognize class object }
var
  TessDllRelease: function :longint cdecl;

var
  TessDLLLoaded: Boolean = False;

implementation

var
  SaveExit: pointer;
  DLLHandle: THandle;
  ErrorMode: Integer;

  procedure NewExit; far;
  begin
    TessDllRelease;
    ExitProc := SaveExit;
    FreeLibrary(DLLHandle)
  end {NewExit};

procedure LoadTessDLL;
begin
  if TessDLLLoaded then Exit;
  ErrorMode := SetErrorMode($8000{SEM_NoOpenFileErrorBox});
  DLLHandle := LoadLibrary('TESSDLL.DLL');
  if DLLHandle >= 32 then
  begin
    TessDLLLoaded := True;
    SaveExit := ExitProc;
    ExitProc := @NewExit;
//    @TessDllAPI := GetProcAddress(DLLHandle,'TessDllAPI');
//    Assert(@TessDllAPI <> nil);

    @TessDllBeginPage := GetProcAddress(DLLHandle,'TessDllBeginPage');
    Assert(@TessDllBeginPage <> nil);

    @TessDllBeginPageLang := GetProcAddress(DLLHandle,'TessDllBeginPageLang');
    Assert(@TessDllBeginPageLang <> nil);

    @TessDllBeginPageUpright :=
GetProcAddress(DLLHandle,'TessDllBeginPageUpright');
    Assert(@TessDllBeginPageUpright <> nil);

    @TessDllBeginPageBPP := GetProcAddress(DLLHandle,'TessDllBeginPageBPP');
    Assert(@TessDllBeginPageBPP <> nil);

    @TessDllBeginPageLangBPP :=
GetProcAddress(DLLHandle,'TessDllBeginPageLangBPP');
    Assert(@TessDllBeginPageLangBPP <> nil);

    @TessDllBeginPageUprightBPP :=
GetProcAddress(DLLHandle,'TessDllBeginPageUprightBPP');
    Assert(@TessDllBeginPageUprightBPP <> nil);

    @TessDllEndPage := GetProcAddress(DLLHandle,'TessDllEndPage');
    Assert(@TessDllEndPage <> nil);

    @TessDllRecognize_a_Block :=
GetProcAddress(DLLHandle,'TessDllRecognize_a_Block');
    Assert(@TessDllRecognize_a_Block <> nil);

    @TessDllRecognize_all_Words :=
GetProcAddress(DLLHandle,'TessDllRecognize_all_Words');
    Assert(@TessDllRecognize_all_Words <> nil);

    @TessDllRelease := GetProcAddress(DLLHandle,'TessDllRelease');
    Assert(@TessDllRelease <> nil);
  end
  else
  begin
    TessDLLLoaded := False;
    { Error: TESSDLL.DLL could not be loaded !! }
  end;
  SetErrorMode(ErrorMode)
end {LoadDLL};

begin
  LoadTessDLL;
end.

================================================
===TesseractOCR.pas ===============================
unit TesseractOCR;
{(C) Copyright 2007, J. Oosting
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
 http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. }
interface
uses Windows, SysUtils,Classes,Graphics,TESSDLL;

type EOCRDLL = class (SysUtils.Exception);

type
  TRecognizer = class
  private
    FBitmap:TBitmap;
    FLanguage: array[0..15] of char;
    function ConvertEText_Desc2String(EText_Desc:PETEXT_DESC):string;
    procedure SetLanguage(const Value: string);
    procedure PrepareOCR;
    procedure ReleaseOCR;
    function GetLanguage: string;
  published
    constructor Create(aBitmap:TBitmap);
    destructor Destroy;override;
    function GetAllWords:string;
    function GetABlock(block:TRect):string;
    property Language:string read GetLanguage write SetLanguage;
  End;

  procedure GetOCRLanguages(langs:TStrings);
  function OCRDLLLoaded:boolean;

implementation
uses Forms;

procedure GetOCRLanguages(langs: TStrings);
var
  sr:TSearchRec;
  res:integer;
  dotpos:integer;
begin
  langs.Clear;
  // look in <appdir>tessdata for language files
  res:=FindFirst(ExtractFilePath(Application.Exename)+'tessdata\*.inttemp',faAnyFile,sr);
  try
    while res=0 do
    begin
      dotpos:=Pos('.',sr.Name);
      langs.Add(copy(sr.name,1,dotpos-1));
      res:=FindNext(sr);
    end;
  finally
    FindClose(sr);
  end;
end;

function OCRDLLLoaded:boolean;
begin
  result:=TessDLLLoaded;
end;
{ TRecognizer }

function TRecognizer.ConvertEText_Desc2String(EText_Desc: PETEXT_DESC): string;
var
  i,b:integer;
  ch:TEANYCODE_CHAR;
begin
  result:='';
  for I := 0 to EText_Desc^.Count - 1 do
  begin
    ch:=EText_Desc^.text[i];
    for b:= 0 to ch.blanks - 1 do
      result:=result+' ';
    result:=result+chr(ch.char_code);
    if (ch.formatting and 64) = 64  then   // end of line
      result:=result+chr(13)+chr(10);
    if (ch.formatting and 128) = 128  then // end of paragraph
      result:=result+chr(13)+chr(10)+chr(13)+chr(10);
  end;
end;

constructor TRecognizer.Create(aBitmap: TBitmap);
const padding=32;
begin
  if not OCRDLLLoaded then
    raise EOCRDLL.Create('Tesseract DLL not loaded');
  // make a copy so bitmap conversions will not change the original picture
  fbitmap:=TBitmap.Create;
  fBitmap.Assign(aBitmap);
  FLanguage:='eng';
end;

destructor TRecognizer.Destroy;
begin
  FBitmap.Free;
  inherited;
end;

function TRecognizer.GetABlock(block: TRect): string;
var
  RecognizedText:PETEXT_DESC;
begin
  PrepareOCR;
  if Integer(FBitmap.ScanLine[0])>Integer(FBitmap.ScanLine[1]) then
  begin
    Block.Top:=FBitmap.Height-Block.Top-1;
    Block.Bottom:=FBitmap.Height-Block.Bottom-1;
  end;
  RecognizedText:=TessDllRecognize_a_Block(block.Left,block.Right,block.Top,block.Bottom);
  result:=ConvertEText_Desc2String(RecognizedText);
  ReleaseOCR;
end;

function TRecognizer.GetAllWords: string;
begin
  result:=GetABlock(Rect(0,0,fBitmap.width-1,FBitmap.Height-1));
end;

function TRecognizer.GetLanguage: string;
begin
  GetLanguage:=FLanguage;
end;

procedure TRecognizer.PrepareOCR;
var
  bpp:integer;
  BytesPerLine:integer;
  VirtualWidth:integer;
begin
  // make sure bitmap is DIB, will hopefully convert some types of
bitmaps to recognizable pixelformats
  FBitmap.HandleType:=bmDIB;
  // convert non-supported bitmap formats and determine bpp
  case FBitmap.PixelFormat of
    pfDevice: begin
      FBitmap.PixelFormat:=pf24bit;
      bpp:=24;
    end;
    pf1bit: bpp:=1;
    pf4bit: begin
      FBitmap.PixelFormat:=pf8bit;
      bpp:=8;
    end;
    pf8bit: bpp:=8;
    pf15bit: begin
      FBitmap.PixelFormat:=pf24bit;
      bpp:=24;
    end;
    pf16bit: begin
      FBitmap.PixelFormat:=pf24bit;
      bpp:=24;
    end;
    pf24bit: bpp:=24;
    pf32bit: begin
      FBitmap.PixelFormat:=pf24bit;
      bpp:=24;
    end;
  else // pfCustom
    raise EInvalidGraphic.Create('Graphics format not recognized for OCR');
  end;
  // handle different types of bitmaps
  // Bitmaps in Delphi are 4-byte aligned per line, images in
Tesseract can be 1-byte aligned
  // make sure that tesseract thinks lines are 4-byte aligned
  BytesPerLine:=Integer(FBitmap.ScanLine[0])-Integer(FBitmap.ScanLine[1]);
  case bpp of
    1:VirtualWidth:=BytesPerLine*8;
    8:VirtualWidth:=BytesPerLine;
  else // 24:
    fBitmap.Width:=4*((FBitmap.Width+3) div 4);
    VirtualWidth:=FBitmap.Width;
  end;
  if  BytesPerLine>0 then // usually Windows DIB
    TessDllBeginPageLangBPP(VirtualWidth,FBitmap.Height,FBitmap.ScanLine[FBitmap.Height-1],FLanguage,bpp)
  else // typical TIFF
    TessDllBeginPageUprightBPP(VirtualWidth,FBitmap.Height,FBitmap.ScanLine[0],FLanguage,bpp);
end;

procedure TRecognizer.ReleaseOCR;
begin
  TessDllEndPage;
end;

procedure TRecognizer.SetLanguage(const Value: string);
begin
  StrPCopy(FLanguage, Value);
end;

end.