idl.tmt.representation
Class BagOfWordsRepresentationBuilder

java.lang.Object
  |
  +--idl.tmt.representation.BagOfWordsRepresentationBuilder
All Implemented Interfaces:
RepresentationBuilder
Direct Known Subclasses:
BodyTextRepresentationBuilder, H1TextRepresentationBuilder, LinkTextRepresentationBuilder, MetaTextRepresentationBuilder, TitleTextRepresentationBuilder

public abstract class BagOfWordsRepresentationBuilder
extends java.lang.Object
implements RepresentationBuilder

Abstract representation builder to handle keeping track of the in-process document representations, and finalizing the TmtMatrix object. Created on Apr 7, 2004

Author:
jelsas

Field Summary
private  boolean binarize
           
private  boolean debug
           
private  TmtMatrix myMatrix
           
private  int numDocs
           
private  java.util.HashMap rep
           
private  boolean shareTermlist
           
private  TermList termList
           
protected  TextDocumentParser textParser
           
private  double weight
           
 
Constructor Summary
BagOfWordsRepresentationBuilder()
           
 
Method Summary
protected  void addTermToDocRepresentation(int docID, java.lang.String term)
           
protected  void buildRepresentation()
           
 void cleanup()
           
 TmtMatrix getRepresentation()
           
 TermList getTermList()
           
 double getWeight()
           
 boolean isBinarize()
           
 boolean isDebug()
           
 boolean isShareTermlist()
           
 void setBinarize(boolean binarize)
           
 void setDebug(boolean debug)
           
 void setNumDocuments(int numDocs)
           
 void setShareTermlist(boolean shareTermlist)
           
 void setTermList(TermList termList)
           
 void setTextParser(TextDocumentParser textParser)
           
 void setWeight(double weight)
           
 java.lang.String toString()
           
 
Methods inherited from class java.lang.Object
, clone, equals, finalize, getClass, hashCode, notify, notifyAll, registerNatives, wait, wait, wait
 

Field Detail

myMatrix

private TmtMatrix myMatrix

numDocs

private int numDocs

binarize

private boolean binarize

shareTermlist

private boolean shareTermlist

debug

private boolean debug

weight

private double weight

termList

private TermList termList

textParser

protected TextDocumentParser textParser

rep

private java.util.HashMap rep
Constructor Detail

BagOfWordsRepresentationBuilder

public BagOfWordsRepresentationBuilder()
Method Detail

setNumDocuments

public void setNumDocuments(int numDocs)
Specified by:
setNumDocuments in interface RepresentationBuilder

addTermToDocRepresentation

protected void addTermToDocRepresentation(int docID,
                                          java.lang.String term)

buildRepresentation

protected void buildRepresentation()

getRepresentation

public TmtMatrix getRepresentation()
Specified by:
getRepresentation in interface RepresentationBuilder

setTermList

public void setTermList(TermList termList)

getTermList

public TermList getTermList()
Specified by:
getTermList in interface RepresentationBuilder

setBinarize

public void setBinarize(boolean binarize)

isBinarize

public boolean isBinarize()

setWeight

public void setWeight(double weight)
Specified by:
setWeight in interface RepresentationBuilder

getWeight

public double getWeight()
Specified by:
getWeight in interface RepresentationBuilder

toString

public java.lang.String toString()
Overrides:
toString in class java.lang.Object

setShareTermlist

public void setShareTermlist(boolean shareTermlist)

isShareTermlist

public boolean isShareTermlist()

setTextParser

public void setTextParser(TextDocumentParser textParser)

cleanup

public void cleanup()
Specified by:
cleanup in interface RepresentationBuilder

setDebug

public void setDebug(boolean debug)

isDebug

public boolean isDebug()