Glyph.h

00001 # ifndef Glyph_h
00002 # define Glyph_h Glyph_h
00003 
00004 // C Dependencies
00005 //  - FUTURE FIX: these headers are only needed while the C++ interface
00006 //    relies on the underlying C code.
00007 
00008 # include "pnm.h"
00009 # include "gocr.h"  // Box
00010 
00011 // C++ Dependencies
00012 # include <vector>
00013 # include "Element.h"
00014 # include "ImageArgs.h"
00015 # include "GlyphFeatures.h"  // LineType, has-a
00016 
00017 namespace Conjecture {
00018 
00026 # define GlyphLoopStart(self, glyph) \
00027 { \
00028     std::vector<Glyph*> __glyphs; \
00029     (self)->allGlyphs(__glyphs); \
00030     for (std::vector<Glyph*>::iterator __it = __glyphs.begin(); __it != __glyphs.end(); ++__it) { \
00031       Glyph* glyph = *__it;
00032 
00033 # define ConstGlyphLoopStart(self, glyph) \
00034 { \
00035     std::vector<const Glyph*> __glyphs; \
00036     (self)->allGlyphs(__glyphs); \
00037     for (std::vector<const Glyph*>::const_iterator __it = __glyphs.begin(); __it != __glyphs.end(); ++__it) { \
00038       const Glyph* glyph = *__it;
00039 
00040 # define GlyphLoopEnd \
00041     } \
00042 }
00043 
00044   /*********************************************************************
00045   * \class Glyph
00046   *
00047   * \brief A rectangular region of the overall image that has been
00048   *  identified as representing a single character.
00049   *
00050   * The Box class represents all information associated with the
00051   * graphical reprsentation of a to-be-identified character.  The
00052   * correctness of this box (i.e. whether it really does represent
00053   * a single character) depends on:
00054   *  - the accuracy of line detection algorithms (currently pretty good)
00055   *  - the accuracy of box detecting algorithms (currently ok,
00056   *    improvements possible)
00057   *
00058   * Ths class is responsible for converting an image of a character
00059   * into a unicode character value, and as such is at the heart of the
00060   * OCR code.
00061   *///******************************************************************
00062 
00063   class Glyph : public Element {
00064   public:
00065     // ********
00066     // Types
00067     class Char {
00068       public:
00069       Char(wchar_t c,int w) { this->c = c; this->weight = w; }
00070       wchar_t c;
00071       int weight;
00072     };
00073 
00074     // *******
00075     // Methods
00076 
00083     Glyph(Element* parent = NULL, void* internal = NULL);
00084 
00090     Glyph(Glyph* glyph);
00091 
00097     virtual Glyph* asGlyph()             { return this; }
00098     virtual const Glyph* asGlyph() const { return this; }
00099 
00100     inline void setUnicode(wchar_t unicode) { this->unicodeIs(unicode); }
00101 
00102     virtual void printSummary(std::ostream& os = std::cerr, const std::string& indent = "", int index = -1) const;
00103 
00104     void addChar(const Glyph::Char& chr);
00105 
00112     GlyphFeatures::LineType lineType() const;
00113 
00114     u2 topY() const { return this->topleft().y; }
00115     u2 bottomY() const { return this->bottomright().y; }
00116     u2 leftX() const { return this->topleft().x; }
00117     u2 rightX() const { return this->bottomright().x; }
00118 
00119     // ---------------------------------------------------------------
00120     // DEBUGGING METHODS
00121 
00131     void writeImage(const std::string& filebase, const ImageArgs& config);
00132 
00138     virtual void writeText(std::ostream& os) const;
00139 
00149     static void test(int argc = 0, const char* argv[] = NULL);
00150 
00151     wchar_t unicode() const { return this->_unicode; }
00152     const std::vector<Char>& charset() const { return this->_charset; }
00153 
00154     // Accessors
00155     inline int  best() const { return this->_best; }
00156     virtual int type() const { return Type; }
00157     // we provide 'box' publically because it is temporary - need to
00158     // remove it anyway, and Page often needs the Box within Glyph.
00159     inline const Box* box() const { return this->_box; }
00160     inline void boxIs(Box* box)   { this->_box = box; }
00161     inline Box* & boxRef()        { return this->_box; }
00162 
00163   protected:
00164     // ********
00165     // Methods
00166 
00167     // ********
00168     // Accessors
00169     inline void bestIs(const int & best) { this->_best = best; }
00170     inline int &           bestRef() { return this->_best; }
00171     wchar_t& unicodeRef() { return this->_unicode; }
00172     void unicodeIs(wchar_t unicode) { this->_unicode = unicode; }
00173     std::vector<Char>& charset() { return this->_charset; }
00174 
00175   private:
00176     // ********
00177     // Methods
00178 
00179     // ********
00180     // Accessors
00181 
00182     // ********
00183     // State
00184     wchar_t           _unicode;
00185 
00199     static const ElementType Type = ELEMENT_GLYPH;
00200 
00201     // Are any of these really necessary?
00202     int                _best; // inex into _charset (best match)
00203     Box*              _box;   // old gocr data-structure
00204     std::vector<Char> _charset;
00205   };
00206 };
00207 
00208 # endif // Glyph_h
00209 

Generated on Wed Jun 14 15:08:02 2006 for Conjecture by  doxygen 1.4.6