clear
close all
clear all
set century on
set confirm on
set date german
set decimals to 7
set exact on
set fullpath on
set function  2 to 
set function  3 to 
set function  4 to
set function  5 to
set function  6 to
set function  7 to 
set function  8 to 
set function  9 to 
set function 10 to
set heading off
set safety off
set status off
set sysmenu off
set talk off
set typeahead to 0
on shutdown quit
Goodsigns = stuff(space(63),1,len("~!@#$%^&*()-=+\:',./<>?"),"~!@#$%^&*()-=+\:',./<>?")
if file("Goodsign.mem")
  restore from Goodsign  && overriding above definition of Goodsigns
endif
set heading off
@ 01,3 say "   Modify Plain-Prose OCR-Text  is a tiny freeware that allows its"
@ 01,6 say "Modify Plain-Prose OCR-Text" style "i"
@ 02,3 say "user to automatically clean the set of uncommon symbols from OCR- "
@ 03,3 say "generated text, such symbols being unlikely to actually remain in "
@ 04,3 say "the plain English text materials subjected to optical character   "
@ 05,3 say "recognition (OCR). In addition, the user is next offered an editor"
@ 06,3 say "window for further manual correction of the text line by line, and"
@ 07,3 say "then this freeware finally attempts to form the paragraphs as best"
@ 08,3 say "as it can. The generated continuous paragraphs may then be copied "
@ 09,3 say "from Take_text.txt,  and formatted as desired in the final-target "
@ 09,8 say "Take_text.txt," style "i"
@ 10,3 say "word-processor. The cycle may then be repeated by putting in text."
@ 12,3 say "   To use this, output from the OCR package, say from Softi Free  "
@ 12,57 say "Softi Free" style "i"
@ 13,3 say "OCR, is to be pasted into the Put_text.txt window of this package."
@ 13,3 say "OCR," style "i"
@ 13,33 say "Put_text.txt" style "i"
@ 15,3 say "   To work on a set of text-images, it'd be advisable to have the "
@ 16,3 say "OCR package, this freeware, the target word-processor & the image-"
@ 17,3 say "viewer all simultaneously opened, and to modify & save one by one!"
emni = 3
@ 19,3 say "The allowed non-alphabet non-digit signs are (may edit this list):"
@ 19,15 say            "non-alphabet non-digit" style "i"
if upper(Goodsigns) = lower(Goodsigns)
  @ 22,21 get emni function '* OK, Start Working on Text' size 2,27
  @ 20,06 get Goodsigns
  read cycle
endif  
clear
set heading on
set sysmenu on
set sysmenu to default
save all like Good* to Goodsign
LastHyph = .F.
Goodkeys = "abcdefghijklmnopqrstuvwxyz ABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890"
Goodkeys = ;
stuff(Goodkeys,len(Goodkeys)+1,len(ltrim(rtrim(Goodsigns))),ltrim(rtrim(Goodsigns)))
Goodkeys = stuff(Goodkeys,len(Goodkeys)+1,1,chr(34))  && double quote
Goodkeys = stuff(Goodkeys,len(Goodkeys)+1,1,chr(59))  && semi-colon
DO WHILE .T.
use ModifOCR  && it contains only two fields lines(char,240) and End_para(logical)
zap
copy file BlnkText.txt to "Put_Text.txt"
modify file "Put_Text.txt"
append from "Put_Text.txt" sdf
delete for len(rtrim(lines)) < 0.5 all
pack
if reccount() < 0.5
  close all
  clear all
  quit
endif
replace lines with ltrim(lines) all
@ 5,6 say   "The total number of lines is: "
??ltrim(str(reccount()))
width_max = 10
go top
do while .not. eof()
  @ 8,6 say "Working now on line number: "
  ??ltrim(str(recno()))
  line_now = rtrim(lines)
  char_num = 1
  do while char_num <= len(line_now)
    char_now = right(left(line_now,char_num),1)
    rightpart = right(line_now,len(line_now)-char_num)
    if char_now $ "01"
      if isalpha(left(rightpart,1)) .or. isalpha(right(left(line_now,char_num-1),1))
        if char_now = "1"
          char_now = "l"
        endif
        if char_now = "0"
          char_now = "o"
          if right(left(line_now,char_num-1),1) = " "
            char_now = "O"
          endif
        endif
        line_now = stuff(line_now,char_num,1,char_now)
      endif
    endif  
    if char_now $ Goodkeys
      char_num = char_num + 1
    else
      if len(rightpart) > 0.5
        line_now = stuff(left(line_now,char_num-1),char_num,len(rightpart),rightpart)
      else  
        line_now = left(line_now,char_num-1)
      endif  
    endif
  enddo
  if len(line_now) > width_max
    width_max = len(line_now)
  endif
  if LastHyph
    LeftWord = left( line_now,at( " ",stuff(line_now,len(line_now)+1,1," ") )-1 )
    line_now = right( line_now,len(line_now)-at( " ",stuff(line_now,len(line_now)+1,1," ") ) )
    skip -1
    replace lines with stuff(rtrim(lines),len(rtrim(lines))+1,len(LeftWord),LeftWord)
    skip
  endif
  LastHyph = .F.
  if right(line_now,1) = "-"
    LastHyph = .T.
  endif
  replace lines with line_now 
  skip
enddo  
width_max = 0.6*width_max
go top
do while .not. eof()
  do case
  case len(rtrim(lines)) < 0.5
    skip -1
    replace end_para with .T. && replace lines with stuff(rtrim(lines),len(rtrim(lines....
    skip
    delete  && will be packed later
    skip
  case len(rtrim(lines)) < width_max
    replace end_para with .T. 
  endcase
  skip
enddo
pack
go bottom
replace end_para with .T. 
clear
go top
set console off
set alternate to "Edit_Lines.txt"
set alternate on
do while .not. eof()
  ?rtrim(lines)
  skip
enddo
set alternate off
close alternate
copy to OCR_copy
zap
modify file "Edit_Lines.txt"
append from "Edit_Lines.txt" sdf
delete for len(rtrim(lines)) < 0.5 all
pack
replace lines with ltrim(lines) all
use
select 2
use OCR_copy
select 1
use ModifOCR
begin_para = .T.
go top
set console off
set alternate to "Take_Text.txt"
set alternate on
do while .not. eof()
  if begin_para
    * replace lines with stuff( "<p>",4,len(rtrim(lines)),rtrim(lines) )
    ?
  else
    ??" "  
  endif
  begin_para = .F.
  rn = recno()
  ??rtrim(lines)
  select 2
  go rn
  end2para = end_para
  select 1
  go rn
  if end2para
    * replace lines with stuff(rtrim(lines),len(rtrim(lines))+1,4,"</p>")
    begin_para = .T.
  endif
  skip
enddo
?
?
set alternate off
close alternate
set console on
select 2
use 
select 1
zap
use
modify file "Take_Text.txt" noedit
ENDDO
* copy file "ModifOCR.prg" to "d:\backup\sb2vfp\dontopen\ModifOCR.prg"
* copy file "ModifOCR.exe" to "C:\modifocr\ModifOCR.exe"