#Become admin sudo -s #Open jTessBoxEditor cd [directory] java -Xms128m -Xmx1024m -jar jTessBoxEditor.jar cd [working directory] #Make box files tesseract --psm 7 eng.archhandwriting.exp3.tiff eng.archhandwriting.exp3 batch.nochop makebox #Manually edit box files as necessary #Train Tesseract (psm is optional and tells Tesseract how to handle pages) tesseract --psm 7 eng.archhandwriting.exp3.tiff eng.archhandwriting.exp3 box.train.stderr #Manually remove lines that say JOINED or BROKEN #Generate Unicharset file unicharset_extractor eng.archhandwriting.exp3.box #continue listing box files if more than one #Set Unicharset Properties cd [tesseract directory] sudo training/set_unicharset_properties -U "[unicharset directory] -O new_unicharset --script_dir=[tessdata directory] #Manually delete any training files from a previous training #Run mftraining mftraining -F font_properties -U new_unicharset -O eng.unicharset "[working directory]/eng.archhandwriting.exp3.tr" #Run cntraining cntraining "[working directory]/eng.archhandwriting.exp3.tr" #Manually rename shapetable, normproto, inttemp, pffmtable, unicharset (add a eng. prefix to each) #Next, combine tessdata combine_tessdata eng. #Finally, move eng.traineddata to tessdata directory #Test it out tesseract "[working directory]/eng.archhandwriting.exp3.tiff" output --psm 7 -l eng