idl.tmt.representation
Class IndexedCollection

java.lang.Object
  |
  +--idl.tmt.representation.IndexedCollection
All Implemented Interfaces:
java.io.Serializable

public class IndexedCollection
extends java.lang.Object
implements java.io.Serializable

A class that allows us manage a collection of terms and documents

Author:
miles
See Also:
Serialized Form

Field Summary
(package private)  java.lang.String collectionName
           
(package private)  DocumentIDMapper docList
           
(package private)  java.util.LinkedList documentClassifications
           
(package private)  java.util.HashMap documentIndex
           
(package private)  java.util.HashMap invertedIndex
           
(package private)  TmtMatrix matrix
           
private static java.lang.String SUFFIX
           
(package private)  TermList termList
           
(package private)  java.util.LinkedList transformations
           
(package private)  URLMapper urlMap
           
 
Constructor Summary
IndexedCollection(java.lang.String collectionName)
           
IndexedCollection(TermList terms, DocumentIDMapper docIDMapper, URLMapper ul, TmtMatrix mat)
           
 
Method Summary
 void addDocumentClassification(DocumentClassification dc)
           
private  void createDocumentIndex()
           
 java.io.File getDocFile(int docID)
           
 java.util.ArrayList getDocTermIDs(int docID)
           
 java.util.ArrayList getDocTerms(int docID)
           
 java.util.List getDocumentClassifications()
           
 DocumentIDMapper getDocumentIDMapper()
           
 TmtMatrix getMatrix()
           
 int getNumDocs()
           
 int getNumTerms()
           
 java.net.URL getRemoteURL(java.io.File docFile)
           
 java.lang.String getTerm(int termID)
           
 java.util.ArrayList getTermDocs(int termID)
           
 TermList getTermList()
           
 URLMapper getURLMapper()
           
 void printCollectionStats()
           
private  void readCollection()
           
private  void saveCollection()
          writes all known information about this collection to disk
 void saveCollection(java.lang.String collectionName)
           
 void setDocumentIDMapper(DocumentIDMapper d)
           
 void setMatrix(TmtMatrix m)
           
 void setTermList(TermList t)
           
 void setURLMapper(URLMapper u)
           
 
Methods inherited from class java.lang.Object
, clone, equals, finalize, getClass, hashCode, notify, notifyAll, registerNatives, toString, wait, wait, wait
 

Field Detail

SUFFIX

private static final java.lang.String SUFFIX

collectionName

java.lang.String collectionName

transformations

java.util.LinkedList transformations

termList

TermList termList

docList

DocumentIDMapper docList

urlMap

URLMapper urlMap

documentIndex

java.util.HashMap documentIndex

invertedIndex

java.util.HashMap invertedIndex

matrix

TmtMatrix matrix

documentClassifications

java.util.LinkedList documentClassifications
Constructor Detail

IndexedCollection

public IndexedCollection(java.lang.String collectionName)
                  throws java.io.FileNotFoundException,
                         java.io.IOException,
                         java.lang.ClassNotFoundException

IndexedCollection

public IndexedCollection(TermList terms,
                         DocumentIDMapper docIDMapper,
                         URLMapper ul,
                         TmtMatrix mat)
Method Detail

saveCollection

public void saveCollection(java.lang.String collectionName)
                    throws java.io.IOException

saveCollection

private void saveCollection()
                     throws java.io.IOException
writes all known information about this collection to disk
Parameters:
myTerms - a TermList mapping termIDs to Strings
myDocs - a DocIDMapper
myMatrix - a document-term matrix (hopefully sparse)

readCollection

private void readCollection()
                     throws java.io.FileNotFoundException,
                            java.io.IOException,
                            java.lang.ClassNotFoundException

getNumTerms

public int getNumTerms()

getNumDocs

public int getNumDocs()

getDocFile

public java.io.File getDocFile(int docID)

getRemoteURL

public java.net.URL getRemoteURL(java.io.File docFile)

getDocTermIDs

public java.util.ArrayList getDocTermIDs(int docID)

getDocTerms

public java.util.ArrayList getDocTerms(int docID)

getTermDocs

public java.util.ArrayList getTermDocs(int termID)

getTerm

public java.lang.String getTerm(int termID)

getMatrix

public TmtMatrix getMatrix()

setMatrix

public void setMatrix(TmtMatrix m)

setDocumentIDMapper

public void setDocumentIDMapper(DocumentIDMapper d)

getDocumentIDMapper

public DocumentIDMapper getDocumentIDMapper()

setURLMapper

public void setURLMapper(URLMapper u)

getURLMapper

public URLMapper getURLMapper()

setTermList

public void setTermList(TermList t)

getTermList

public TermList getTermList()

createDocumentIndex

private void createDocumentIndex()

addDocumentClassification

public void addDocumentClassification(DocumentClassification dc)

getDocumentClassifications

public java.util.List getDocumentClassifications()

printCollectionStats

public void printCollectionStats()