Element.h

00001 # ifndef Conjecture_Element_h
00002 # define Conjecture_Element_h Conjecture_Element_h
00003 
00004 // C++ Dependencies
00005 # include "Root.h"        // parent
00006 # include <list>          // field 'parts'
00007 # include <vector>        // method 'allGlyphs'
00008 # include <iostream>      // cerr
00009 # include "Coord.h"       // Coord
00010 # include "ImageArgs.h"
00011 
00012 // Declaration
00013 namespace Conjecture {
00014     // Forward declarations
00015     class Page;
00016     class Region;
00017     class Line;
00018     class Word;
00019     class Glyph;
00020     class Image;
00021     
00022     // Types of Elements
00023     typedef enum ElementType {
00024         ELEMENT_PAGE,
00025         ELEMENT_REGION,
00026         ELEMENT_LINE,
00027         ELEMENT_WORD,
00028         ELEMENT_GLYPH
00029     };
00030     
00031     /********************************************************************
00032      * \class Element
00033      *
00034      * \brief Abstract superclass of a collection of classes
00035      * representing a part-whole decomposition of a graphic image into
00036      * smaller and smaller semantic units.
00037      *
00038      * The Element class is at the root of a critical hierarchy of
00039      * subclasses. Certain subclasses (Page and Glyph) reprsents the
00040      * most important classes in the implementation, and much of their
00041      * functionality is inherited from Element. Other subclasses
00042      * (Region, Line and Word) provide "refinements" but aren't
00043      * strictly necessary to the proper execution of the code.
00044      *
00045      * An Element consists of state and functionality supporting a
00046      * hierarchial decomposition of larger parts into a collection of
00047      * smaller Elements contained within their "parent" part. The
00048      * subclasses of Element have a conceptual "size order", and a
00049      * tree structure links Elements to their parents and to their
00050      * contained sub-images. Pages are the biggest, Regions are
00051      * contained within Pages, Lines are contained within Regions or
00052      * Pages, Words are contained within Lines, Regions, or Pages, and
00053      * Glyphs are contained within Words, Lines, Regions or Pages.
00054      *
00055      * Implementation subtleties:
00056      * --------------------------
00057      * Since Elements are part of a part-whole decomposition into
00058      * smaller and smaller images, the manner in which this
00059      * decomposition is implemented can have significant time and
00060      * space efficiency ramifications.
00061      *
00062      * One strategy would be to have each Element subclass maintain a
00063      * local copy of that portion of the overall image to which it
00064      * applies. Fields 'height' and 'width' would establish the pixel
00065      * dimensions, and a 'data' field could store the actual pixel
00066      * information. For example, each Glyph could maintain a copy of
00067      * that portion of the Page represented by the Glyph.
00068      *
00069      * The problem with the above naive implementation is that it
00070      * incurs more and more memory the more sub-divided an Element
00071      * becomes. Although it is common for Pages to contain just a
00072      * collection of Glyphs, it is also possible for a Page to contain
00073      * Regions that contain Lines that contain Words that contain
00074      * Glyphs (which might contain other Glyphs). In such a situation,
00075      * the pixel data representing an individual Glyph would be copied
00076      * (and maintained in memory) up to 5 times (stored in a Glyph,
00077      * stored in a Word, stored in a Line, stored in a Region, and
00078      * stored in the Page.
00079      *
00080      * The above memory impact can be avoided by taking a different
00081      * approach. Instead of each Element maintaining a separate copy
00082      * of its portion of an image, we note that the largest possible
00083      * Element is a Page, and all other subclasses of Page are
00084      * conceptually contained within a Page. If each Element has
00085      * access to its containing Page, then we could replaced the
00086      * 'data' field with a <x,y> coordinate identifying the topleft
00087      * pixel within the Page at which the sub-image begins. In
00088      * addition, we could maintain either a <width,height> pair, or
00089      * another <x,y> pair representing the bottom right corner (either
00090      * one will allow computation of the other). By maintaining
00091      * topleft relative to the Page, we do not need to maintain
00092      * multiple copies of the data (which is what took up most of the
00093      * memory in the first variant above).
00094      *
00095      * Because this second strategy significantly reduces memory
00096      * costs, it is the strategy currently used. However, the
00097      * interface is separate from the implementation, and we can
00098      * change the implementation as desired if it becomes necessary.
00099      *
00100      * An Element has the following fields:
00101      *   parent       --> the parent Element
00102      *   parts        --> the list of sub parts
00103      *   topleft      --> the <x,y> pixel coordinate within the Page image
00104      *                    to which this image belongs of the top-left corner
00105      *                    of this sub image.  <x,y> is <0,0> for Page
00106      *                    instances.
00107      *   bottomright  --> the <x,y> pixel coordinate within the Page image
00108      *                    to which this image belongs of the bottom-right
00109      *                    corner of this sub image.
00110      *
00111      * An Element can establish its Page by traversing its 'parent'
00112      * links until one is found that has no parent (Page instances
00113      * have a NULL parent).
00114      *
00115      *///******************************************************************
00116 
00117     class Element : public Root {
00118       public:
00119         // Types
00120         typedef std::list<Element*> ElementList;
00121         
00140         Element(Element* parent, const Coord& topleft, const Coord& size);
00141         virtual ~Element();
00142 
00148         inline void markInvisible(bool invisible = true) { this->invisibleIs(invisible); }
00149         
00168          virtual int type() const;
00169         
00174          void registerElement(Element* part);
00175          Element* firstElement() const { return ( this->parts().empty() ) ? NULL : this->parts().front(); }
00176          Element* lastElement() const  { return ( this->parts().empty() ) ? NULL : this->parts().back();  }
00177          
00178          Coord size() const { return Coord( this->width(), this->height() ); }
00179 
00188          void allGlyphs(std::vector<Glyph*>& glyphs);
00189          void allGlyphs(std::vector<const Glyph*>& glyphs) const;
00190         
00206         Page* containingPage() const;
00207         
00219         virtual const Image* pageImage() const;
00220 
00225         virtual void writeText(std::ostream& os) const;
00226         
00235         virtual Page*   asPage()   { return NULL; }
00236         virtual Region* asRegion() { return NULL; }
00237         virtual Line*   asLine()   { return NULL; }
00238         virtual Word*   asWord()   { return NULL; }
00239         virtual Glyph*  asGlyph()  { return NULL; }
00240         
00241         virtual const Page*   asPage()   const { return NULL; }
00242         virtual const Region* asRegion() const { return NULL; }
00243         virtual const Line*   asLine()   const { return NULL; }
00244         virtual const Word*   asWord()   const { return NULL; }
00245         virtual const Glyph*  asGlyph()  const { return NULL; }
00246         
00251         inline int height() const { return this->bottomright().y - this->topleft().y + 1; }
00252 
00257         inline int width()  const { return this->bottomright().x - this->topleft().x + 1; }
00258         
00259         // **********************************************************
00260         // Debugging methods
00261         // **********************************************************
00262         
00267         void printStructure(std::ostream& os = std::cerr, const std::string& indent = "") const;
00268         
00273         virtual void printSummary(std::ostream& os = std::cerr, const std::string& indent = "", int index = -1) const;
00274 
00279         void writeGlyphs(const std::string& dir, const ImageArgs& adj) const;
00280         
00294         virtual std::string id() const;
00295 
00304         int findIndex(const Element* image) const;
00305 
00315         static void test(int argc = 0, const char* argv[] = NULL);
00316         
00317         inline ElementList & partsRef() { return this->_parts; }
00318         
00319         // Accessors
00320         inline const Coord &   topleft() const { return this->_topleft; }
00321         inline const Coord &   bottomright() const { return this->_bottomright; }
00322         inline const ElementList& parts() const { return this->_parts; }
00323         inline const Element*    parent() const { return this->_parent; }
00324         inline bool  invisible() const { return this->_invisible; }
00325         
00326       protected:
00327         // Accessors
00328         inline void            topleftIs(const Coord & topleft) { this->_topleft = topleft; }
00329         inline Coord &         topleftRef() { return this->_topleft; }
00330         inline void            bottomrightIs(const Coord & bottomright) { this->_bottomright = bottomright; }
00331         inline Coord &         bottomrightRef() { return this->_bottomright; }
00332         inline void            partsIs(const ElementList & parts) { this->_parts = parts; }
00333         inline void            parentIs(Element* parent) { this->_parent = parent; }
00334         inline Element* &      parentRef() { return this->_parent; }
00335         inline void            invisibleIs(bool invisible) { this->_invisible = invisible; }
00336         
00337         // Methods
00338         
00339         // Fields
00340         static char* ClassName[6];
00341         
00342       private:
00343         // Accessors
00344         inline bool& invisibleRef() { return this->_invisible; }
00345         
00346         // Methods
00347         
00348         // *******************
00349         // State
00350 
00357         Coord           _topleft;
00358         
00365         Coord           _bottomright;
00366         
00387         Element*          _parent;
00388 
00415         ElementList _parts;
00416         
00431         bool            _invisible;
00432     };
00433 };
00434 
00435 # endif // Conjecture_Element_h
00436 

Generated on Wed Jun 14 15:08:02 2006 for Conjecture by  doxygen 1.4.6