idl.tmt.representation
Class IndexedCollection
java.lang.Object
|
+--idl.tmt.representation.IndexedCollection
- All Implemented Interfaces:
- java.io.Serializable
- public class IndexedCollection
- extends java.lang.Object
- implements java.io.Serializable
A class that allows us manage a collection of terms and documents
- Author:
- miles
- See Also:
- Serialized Form
| Methods inherited from class java.lang.Object |
, clone, equals, finalize, getClass, hashCode, notify, notifyAll, registerNatives, toString, wait, wait, wait |
SUFFIX
private static final java.lang.String SUFFIX
collectionName
java.lang.String collectionName
transformations
java.util.LinkedList transformations
termList
TermList termList
docList
DocumentIDMapper docList
urlMap
URLMapper urlMap
documentIndex
java.util.HashMap documentIndex
invertedIndex
java.util.HashMap invertedIndex
matrix
TmtMatrix matrix
documentClassifications
java.util.LinkedList documentClassifications
IndexedCollection
public IndexedCollection(java.lang.String collectionName)
throws java.io.FileNotFoundException,
java.io.IOException,
java.lang.ClassNotFoundException
IndexedCollection
public IndexedCollection(TermList terms,
DocumentIDMapper docIDMapper,
URLMapper ul,
TmtMatrix mat)
saveCollection
public void saveCollection(java.lang.String collectionName)
throws java.io.IOException
saveCollection
private void saveCollection()
throws java.io.IOException
- writes all known information about this collection to disk
- Parameters:
myTerms - a TermList mapping termIDs to StringsmyDocs - a DocIDMappermyMatrix - a document-term matrix (hopefully sparse)
readCollection
private void readCollection()
throws java.io.FileNotFoundException,
java.io.IOException,
java.lang.ClassNotFoundException
getNumTerms
public int getNumTerms()
getNumDocs
public int getNumDocs()
getDocFile
public java.io.File getDocFile(int docID)
getRemoteURL
public java.net.URL getRemoteURL(java.io.File docFile)
getDocTermIDs
public java.util.ArrayList getDocTermIDs(int docID)
getDocTerms
public java.util.ArrayList getDocTerms(int docID)
getTermDocs
public java.util.ArrayList getTermDocs(int termID)
getTerm
public java.lang.String getTerm(int termID)
getMatrix
public TmtMatrix getMatrix()
setMatrix
public void setMatrix(TmtMatrix m)
setDocumentIDMapper
public void setDocumentIDMapper(DocumentIDMapper d)
getDocumentIDMapper
public DocumentIDMapper getDocumentIDMapper()
setURLMapper
public void setURLMapper(URLMapper u)
getURLMapper
public URLMapper getURLMapper()
setTermList
public void setTermList(TermList t)
getTermList
public TermList getTermList()
createDocumentIndex
private void createDocumentIndex()
addDocumentClassification
public void addDocumentClassification(DocumentClassification dc)
getDocumentClassifications
public java.util.List getDocumentClassifications()
printCollectionStats
public void printCollectionStats()