Jan Oosting Zobrazit profil Přeložit do jazyka: čeština Přeloženo (zobrazit originál) Další možnosti 31 pro 2007, 10:31 I created a couple of units for Delphi that use the tessdll. The first one basically translates the tessdll.h file, the second unit defines a class that can take a bitmap, and OCR it. There is no proper documentation (yet), so you'll have to have a look at the source to be able to use. The comments show some of the issues that had to be overcome in order to use tessdll from Delphi Jan ==TESSDLL.PAS====================================================== unit TESSDLL; {**************************************************************************} { } { This C DLL header file first (automatic) conversion generated by: } { HeadConv 4.0 (c) 2000 by Bob Swart (aka Dr.Bob - www.drbob42.com) } { Final Delphi-Jedi (Darth) command-line units edition } { } { Generated Date: 02-08-2007 } { Generated Time: 11:50:26 } { } {**************************************************************************} interface uses Windows; {//////////////////////////////////////////////////////////////////////// } {/// File: tessdll.h } {/// Description: Windows dll interface for Tesseract. } {/// Author: Glen Wernersbach } {/// Created: Tue May 15 10:30:01 PDT 2007 } {/// } {/// (C) Copyright 2007, Jetsoftdev. } {/// Licensed under the Apache License, Version 2.0 (the "License"); } {/// you may not use this file except in compliance with the License. } {/// You may obtain a copy of the License at } {/// http://www.apache.org/licenses/LICENSE-2.0 } {/// Unless required by applicable law or agreed to in writing, software } {/// distributed under the License is distributed on an "AS IS" BASIS, } {/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. } {/// See the License for the specific language governing permissions and } {/// limitations under the License. } {/// } { Delphi translation: J. Oosting {//////////////////////////////////////////////////////////////////////// } type TEANYCODE_CHAR = packed record //*single character */ // It should be noted that the format for char_code for version 2.0 and beyond is UTF8 // which means that ASCII characters will come out as one structure but other characters // will be returned in two or more instances of this structure with a single byte of the // UTF8 code in each, but each will have the same bounding box. // Programs which want to handle languagues with different characters sets will need to // handle extended characters appropriately, but *all* code needs to be prepared to // receive UTF8 coded characters for characters such as bullet and fancy quotes. char_code:word; //*character itself */ left:SmallInt; //*of char (-1) */ right:smallInt; //*of char (-1) */ top:smallInt; //*of char (-1) */ bottom:smallint; //*of char (-1) */ font_index:smallInt; //*what font (0) */ confidence:byte; //*0=perfect, 100=reject (0/100) */ point_size:byte; //*of char, 72=i inch, (10) */ blanks:shortint; //*no of spaces before this char (1) */ formatting:byte; //*char formatting (0) */ end; PEANYCODE_CHAR = ^TEANYCODE_CHAR; {/********************************************************************** * ETEXT_DESC * Description of the output of the OCR engine. * This structure is used as both a progress monitor and the final * output header, since it needs to be a valid progress monitor while * the OCR engine is storing its output to shared memory. * During progress, all the buffer info is -1. * Progress starts at 0 and increases to 100 during OCR. No other constraint. * Every progress callback, the OCR engine must set ocr_alive to 1. * The HP side will set ocr_alive to 0. Repeated failure to reset * to 1 indicates that the OCR engine is dead. * If the cancel function is not null then it is called with the number of * user words found. If it returns true then operation is cancelled. **********************************************************************/ typedef bool (*CANCEL_FUNC)(void* cancel_this, int words);} TETEXT_DESC = record //*output header */ count:smallInt; //*chars in this buffer(0) */ progress:smallInt; //*percent complete increasing (0-100) */ more_to_come:shortInt; //*true if not last */ ocr_alive:shortInt; //*ocr sets to 1, HP 0 */ err_code:shortInt; //*for errcode use */ cancel:pointer; //*returns true to cancel */ cancel_this:pointer; //*this or other data for cancel*/ end_time:longint; //*time to stop if not 0*/ text: array[0..31999] of TEANYCODE_CHAR; //*character data */ end ; PETEXT_DESC = ^TETEXT_DESC; {///The functions below provide a c wrapper to a global recognize class object } {///xsize should be the width of line in bytes times 8 } {///ysize is the height } {///pass through a buffer of bytes for a 1 bit per pixel bitmap } {///BeginPage assumes the first memory address is the bottom of the image (MS DIB format) } {///BeginPageUpright assumes the first memory address is the top of the image (TIFF format) } {///lang is the code of the language for which the data will be loaded. } {///(Codes follow ISO 639-2.) If it is NULL, english (eng) will be loaded. } var TessDllBeginPage: function(xsize: cardinal; ysize: cardinal; buf: pointer): LongInt ;cdecl; var TessDllBeginPageLang: function(xsize: cardinal; ysize: cardinal; buf: Pointer; const lang: PChar): LongInt ; cdecl; var TessDllBeginPageUpright: function(xsize: cardinal; ysize: cardinal; buf: Pointer; const lang: PChar): LongInt ; cdecl; {///Added in version 2.0 to allow users to specify bytes per pixel to do } {///1 for binary biptmap } {///8 for gray } {///24 bit for color RGB } var TessDllBeginPageBPP: function(xsize: cardinal; ysize: cardinal; buf: pointer; bpp: byte): LongInt ; cdecl; var TessDllBeginPageLangBPP: function(xsize: cardinal; ysize: cardinal; buf: pointer; const lang: PChar; bpp: byte): LongInt ; cdecl; var TessDllBeginPageUprightBPP: function(xsize: cardinal; ysize: cardinal; buf: pointer; const lang: PChar; bpp: byte): LongInt ; cdecl; var TessDllEndPage: function:longint; cdecl; {///This allows you to extract one word or section from the bitmap or } {///the whole page } {///To extract the whole page just enter zeros for left, right, top, bottom } {///Note: getting one word at time is not yet optimized for speed. } {///limit of 32000 character can be returned } {///see ocrclass.h for a decription of the ETEXT_DESC file } var TessDllRecognize_a_Block: function(left: cardinal; right: cardinal; top: cardinal; bottom: cardinal): PETEXT_DESC; cdecl; var TessDllRecognize_all_Words: function: PETEXT_DESC cdecl; {///This will release any memory associated with the recognize class object } var TessDllRelease: function :longint cdecl; var TessDLLLoaded: Boolean = False; implementation var SaveExit: pointer; DLLHandle: THandle; ErrorMode: Integer; procedure NewExit; far; begin TessDllRelease; ExitProc := SaveExit; FreeLibrary(DLLHandle) end {NewExit}; procedure LoadTessDLL; begin if TessDLLLoaded then Exit; ErrorMode := SetErrorMode($8000{SEM_NoOpenFileErrorBox}); DLLHandle := LoadLibrary('TESSDLL.DLL'); if DLLHandle >= 32 then begin TessDLLLoaded := True; SaveExit := ExitProc; ExitProc := @NewExit; // @TessDllAPI := GetProcAddress(DLLHandle,'TessDllAPI'); // Assert(@TessDllAPI <> nil); @TessDllBeginPage := GetProcAddress(DLLHandle,'TessDllBeginPage'); Assert(@TessDllBeginPage <> nil); @TessDllBeginPageLang := GetProcAddress(DLLHandle,'TessDllBeginPageLang'); Assert(@TessDllBeginPageLang <> nil); @TessDllBeginPageUpright := GetProcAddress(DLLHandle,'TessDllBeginPageUpright'); Assert(@TessDllBeginPageUpright <> nil); @TessDllBeginPageBPP := GetProcAddress(DLLHandle,'TessDllBeginPageBPP'); Assert(@TessDllBeginPageBPP <> nil); @TessDllBeginPageLangBPP := GetProcAddress(DLLHandle,'TessDllBeginPageLangBPP'); Assert(@TessDllBeginPageLangBPP <> nil); @TessDllBeginPageUprightBPP := GetProcAddress(DLLHandle,'TessDllBeginPageUprightBPP'); Assert(@TessDllBeginPageUprightBPP <> nil); @TessDllEndPage := GetProcAddress(DLLHandle,'TessDllEndPage'); Assert(@TessDllEndPage <> nil); @TessDllRecognize_a_Block := GetProcAddress(DLLHandle,'TessDllRecognize_a_Block'); Assert(@TessDllRecognize_a_Block <> nil); @TessDllRecognize_all_Words := GetProcAddress(DLLHandle,'TessDllRecognize_all_Words'); Assert(@TessDllRecognize_all_Words <> nil); @TessDllRelease := GetProcAddress(DLLHandle,'TessDllRelease'); Assert(@TessDllRelease <> nil); end else begin TessDLLLoaded := False; { Error: TESSDLL.DLL could not be loaded !! } end; SetErrorMode(ErrorMode) end {LoadDLL}; begin LoadTessDLL; end. ================================================ ===TesseractOCR.pas =============================== unit TesseractOCR; {(C) Copyright 2007, J. Oosting Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. } interface uses Windows, SysUtils,Classes,Graphics,TESSDLL; type EOCRDLL = class (SysUtils.Exception); type TRecognizer = class private FBitmap:TBitmap; FLanguage: array[0..15] of char; function ConvertEText_Desc2String(EText_Desc:PETEXT_DESC):string; procedure SetLanguage(const Value: string); procedure PrepareOCR; procedure ReleaseOCR; function GetLanguage: string; published constructor Create(aBitmap:TBitmap); destructor Destroy;override; function GetAllWords:string; function GetABlock(block:TRect):string; property Language:string read GetLanguage write SetLanguage; End; procedure GetOCRLanguages(langs:TStrings); function OCRDLLLoaded:boolean; implementation uses Forms; procedure GetOCRLanguages(langs: TStrings); var sr:TSearchRec; res:integer; dotpos:integer; begin langs.Clear; // look in tessdata for language files res:=FindFirst(ExtractFilePath(Application.Exename)+'tessdata\*.inttemp',faAnyFile,sr); try while res=0 do begin dotpos:=Pos('.',sr.Name); langs.Add(copy(sr.name,1,dotpos-1)); res:=FindNext(sr); end; finally FindClose(sr); end; end; function OCRDLLLoaded:boolean; begin result:=TessDLLLoaded; end; { TRecognizer } function TRecognizer.ConvertEText_Desc2String(EText_Desc: PETEXT_DESC): string; var i,b:integer; ch:TEANYCODE_CHAR; begin result:=''; for I := 0 to EText_Desc^.Count - 1 do begin ch:=EText_Desc^.text[i]; for b:= 0 to ch.blanks - 1 do result:=result+' '; result:=result+chr(ch.char_code); if (ch.formatting and 64) = 64 then // end of line result:=result+chr(13)+chr(10); if (ch.formatting and 128) = 128 then // end of paragraph result:=result+chr(13)+chr(10)+chr(13)+chr(10); end; end; constructor TRecognizer.Create(aBitmap: TBitmap); const padding=32; begin if not OCRDLLLoaded then raise EOCRDLL.Create('Tesseract DLL not loaded'); // make a copy so bitmap conversions will not change the original picture fbitmap:=TBitmap.Create; fBitmap.Assign(aBitmap); FLanguage:='eng'; end; destructor TRecognizer.Destroy; begin FBitmap.Free; inherited; end; function TRecognizer.GetABlock(block: TRect): string; var RecognizedText:PETEXT_DESC; begin PrepareOCR; if Integer(FBitmap.ScanLine[0])>Integer(FBitmap.ScanLine[1]) then begin Block.Top:=FBitmap.Height-Block.Top-1; Block.Bottom:=FBitmap.Height-Block.Bottom-1; end; RecognizedText:=TessDllRecognize_a_Block(block.Left,block.Right,block.Top,block.Bottom); result:=ConvertEText_Desc2String(RecognizedText); ReleaseOCR; end; function TRecognizer.GetAllWords: string; begin result:=GetABlock(Rect(0,0,fBitmap.width-1,FBitmap.Height-1)); end; function TRecognizer.GetLanguage: string; begin GetLanguage:=FLanguage; end; procedure TRecognizer.PrepareOCR; var bpp:integer; BytesPerLine:integer; VirtualWidth:integer; begin // make sure bitmap is DIB, will hopefully convert some types of bitmaps to recognizable pixelformats FBitmap.HandleType:=bmDIB; // convert non-supported bitmap formats and determine bpp case FBitmap.PixelFormat of pfDevice: begin FBitmap.PixelFormat:=pf24bit; bpp:=24; end; pf1bit: bpp:=1; pf4bit: begin FBitmap.PixelFormat:=pf8bit; bpp:=8; end; pf8bit: bpp:=8; pf15bit: begin FBitmap.PixelFormat:=pf24bit; bpp:=24; end; pf16bit: begin FBitmap.PixelFormat:=pf24bit; bpp:=24; end; pf24bit: bpp:=24; pf32bit: begin FBitmap.PixelFormat:=pf24bit; bpp:=24; end; else // pfCustom raise EInvalidGraphic.Create('Graphics format not recognized for OCR'); end; // handle different types of bitmaps // Bitmaps in Delphi are 4-byte aligned per line, images in Tesseract can be 1-byte aligned // make sure that tesseract thinks lines are 4-byte aligned BytesPerLine:=Integer(FBitmap.ScanLine[0])-Integer(FBitmap.ScanLine[1]); case bpp of 1:VirtualWidth:=BytesPerLine*8; 8:VirtualWidth:=BytesPerLine; else // 24: fBitmap.Width:=4*((FBitmap.Width+3) div 4); VirtualWidth:=FBitmap.Width; end; if BytesPerLine>0 then // usually Windows DIB TessDllBeginPageLangBPP(VirtualWidth,FBitmap.Height,FBitmap.ScanLine[FBitmap.Height-1],FLanguage,bpp) else // typical TIFF TessDllBeginPageUprightBPP(VirtualWidth,FBitmap.Height,FBitmap.ScanLine[0],FLanguage,bpp); end; procedure TRecognizer.ReleaseOCR; begin TessDllEndPage; end; procedure TRecognizer.SetLanguage(const Value: string); begin StrPCopy(FLanguage, Value); end; end.