- Timestamp:
- 07/05/11 15:18:56 (13 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/EventBenchCore/src/de/ugoe/cs/eventbench/models/Trie.java
r102 r106 6 6 import java.util.LinkedList; 7 7 import java.util.List; 8 import java.util.Set;9 8 10 9 import de.ugoe.cs.util.StringTools; 11 10 12 11 import edu.uci.ics.jung.graph.DelegateTree; 12 import edu.uci.ics.jung.graph.Graph; 13 13 import edu.uci.ics.jung.graph.Tree; 14 14 15 /** 16 * <p> 17 * This class implements a <it>trie</it>, i.e., a tree of sequences that the 18 * occurence of subsequences up to a predefined length. This length is the trie 19 * order. 20 * </p> 21 * 22 * @author Steffen Herbold 23 * 24 * @param <T> 25 * Type of the symbols that are stored in the trie. 26 * 27 * @see TrieNode 28 */ 15 29 public class Trie<T> implements IDotCompatible, Serializable { 16 17 /** 30 31 /** 32 * <p> 18 33 * Id for object serialization. 34 * </p> 19 35 */ 20 36 private static final long serialVersionUID = 1L; 21 37 22 private Set<T> knownSymbols; 23 38 /** 39 * <p> 40 * Collection of all symbols occuring in the trie. 41 * </p> 42 */ 43 private Collection<T> knownSymbols; 44 45 /** 46 * <p> 47 * Reference to the root of the trie. 48 * </p> 49 */ 24 50 private final TrieNode<T> rootNode; 25 51 52 /** 53 * <p> 54 * Contructor. Creates a new Trie. 55 * </p> 56 */ 26 57 public Trie() { 27 58 rootNode = new TrieNode<T>(); 28 59 knownSymbols = new LinkedHashSet<T>(); 29 60 } 30 31 public Set<T> getKnownSymbols() { 61 62 /** 63 * <p> 64 * Returns a collection of all symbols occuring in the trie. 65 * </p> 66 * 67 * @return symbols occuring in the trie 68 */ 69 public Collection<T> getKnownSymbols() { 32 70 return new LinkedHashSet<T>(knownSymbols); 33 71 } 34 35 // trains the current Trie using the given sequence and adds all subsequence of length trieOrder 72 73 /** 74 * <p> 75 * Trains the current trie using the given sequence and adds all subsequence 76 * of length {@code maxOrder}. 77 * </p> 78 * 79 * @param sequence 80 * sequence whose subsequences are added to the trie 81 * @param maxOrder 82 * maximum length of the subsequences added to the trie 83 */ 36 84 public void train(List<T> sequence, int maxOrder) { 37 85 IncompleteMemory<T> latestActions = new IncompleteMemory<T>(maxOrder); 38 int i =0;39 for (T currentEvent : sequence) {86 int i = 0; 87 for (T currentEvent : sequence) { 40 88 latestActions.add(currentEvent); 41 89 knownSymbols.add(currentEvent); 42 90 i++; 43 if ( i>=maxOrder) {91 if (i >= maxOrder) { 44 92 add(latestActions.getLast(maxOrder)); 45 93 } 46 94 } 47 95 int sequenceLength = sequence.size(); 48 for( int j=maxOrder-1 ; j>0 ; j-- ) { 49 add(sequence.subList(sequenceLength-j, sequenceLength)); 50 } 51 } 52 53 54 // increases the counters for each symbol in the subsequence 55 public void add(List<T> subsequence) { 56 if( subsequence!=null && !subsequence.isEmpty() ) { 96 for (int j = maxOrder - 1; j > 0; j--) { 97 add(sequence.subList(sequenceLength - j, sequenceLength)); 98 } 99 } 100 101 /** 102 * <p> 103 * Adds a given subsequence to the trie and increases the counters 104 * accordingly. 105 * </p> 106 * 107 * @param subsequence 108 * subsequence whose counters are increased 109 * @see TrieNode#add(List) 110 */ 111 protected void add(List<T> subsequence) { 112 if (subsequence != null && !subsequence.isEmpty()) { 57 113 knownSymbols.addAll(subsequence); 58 subsequence = new LinkedList<T>(subsequence); 114 subsequence = new LinkedList<T>(subsequence); // defensive copy! 59 115 T firstSymbol = subsequence.get(0); 60 116 TrieNode<T> node = getChildCreate(firstSymbol); … … 63 119 } 64 120 65 protected TrieNode<T> getChildCreate(T symbol) { 121 /** 122 * <p> 123 * Returns the child of the root node associated with the given symbol or 124 * creates it if it does not exist yet. 125 * </p> 126 * 127 * @param symbol 128 * symbol whose node is required 129 * @return node associated with the symbol 130 * @see TrieNode#getChildCreate(Object) 131 */ 132 protected TrieNode<T> getChildCreate(T symbol) { 66 133 return rootNode.getChildCreate(symbol); 67 134 } 68 135 136 /** 137 * <p> 138 * Returns the child of the root node associated with the given symbol or 139 * null if it does not exist. 140 * </p> 141 * 142 * @param symbol 143 * symbol whose node is required 144 * @return node associated with the symbol; null if no such node exists 145 * @see TrieNode#getChild(Object) 146 */ 69 147 protected TrieNode<T> getChild(T symbol) { 70 148 return rootNode.getChild(symbol); 71 149 } 72 150 73 // get the count of "sequence" 151 /** 152 * <p> 153 * Returns the number of occurences of the given sequence. 154 * </p> 155 * 156 * @param sequence 157 * sequence whose number of occurences is required 158 * @return number of occurences of the sequence 159 */ 74 160 public int getCount(List<T> sequence) { 75 161 int count = 0; 76 162 TrieNode<T> node = find(sequence); 77 if ( node!=null) {163 if (node != null) { 78 164 count = node.getCount(); 79 165 } 80 166 return count; 81 167 } 82 83 // get the count of "sequence,follower" 168 169 /** 170 * <p> 171 * Returns the number of occurences of the given prefix and a symbol that 172 * follows it.<br> 173 * Convenience function to simplify usage of {@link #getCount(List)}. 174 * </p> 175 * 176 * @param sequence 177 * prefix of the sequence 178 * @param follower 179 * suffix of the sequence 180 * @return number of occurences of the sequence 181 * @see #getCount(List) 182 */ 84 183 public int getCount(List<T> sequence, T follower) { 85 184 List<T> tmpSequence = new LinkedList<T>(sequence); 86 185 tmpSequence.add(follower); 87 186 return getCount(tmpSequence); 88 89 } 90 187 188 } 189 190 /** 191 * <p> 192 * Searches the trie for a given sequence and returns the node associated 193 * with the sequence or null if no such node is found. 194 * </p> 195 * 196 * @param sequence 197 * sequence that is searched for 198 * @return node associated with the sequence 199 * @see TrieNode#find(List) 200 */ 91 201 public TrieNode<T> find(List<T> sequence) { 92 if ( sequence==null || sequence.isEmpty()) {202 if (sequence == null || sequence.isEmpty()) { 93 203 return rootNode; 94 204 } … … 96 206 TrieNode<T> result = null; 97 207 TrieNode<T> node = getChild(sequenceCopy.get(0)); 98 if ( node!=null) {208 if (node != null) { 99 209 sequenceCopy.remove(0); 100 210 result = node.find(sequenceCopy); … … 102 212 return result; 103 213 } 104 105 // returns all symbols that follow the defined sequence 214 215 /** 216 * <p> 217 * Returns a collection of all symbols that follow a given sequence in the 218 * trie. In case the sequence is not found or no symbols follow the sequence 219 * the result will be empty. 220 * </p> 221 * 222 * @param sequence 223 * sequence whose followers are returned 224 * @return symbols following the given sequence 225 * @see TrieNode#getFollowingSymbols() 226 */ 106 227 public Collection<T> getFollowingSymbols(List<T> sequence) { 107 228 Collection<T> result = new LinkedList<T>(); 108 229 TrieNode<T> node = find(sequence); 109 if ( node!=null) {230 if (node != null) { 110 231 result = node.getFollowingSymbols(); 111 232 } 112 233 return result; 113 234 } 114 115 // longest suffix of context, that is contained in the tree and whose children are leaves 116 // possibly already deprecated 235 236 /** 237 * <p> 238 * Returns the longest suffix of the given context that is contained in the 239 * tree and whose children are leaves. 240 * </p> 241 * 242 * @param context 243 * context whose suffix is searched for 244 * @return longest suffix of the context 245 */ 117 246 public List<T> getContextSuffix(List<T> context) { 118 247 List<T> contextSuffix = new LinkedList<T>(context); // defensive copy 119 248 boolean suffixFound = false; 120 121 while (!suffixFound) {122 if ( contextSuffix.isEmpty()) {249 250 while (!suffixFound) { 251 if (contextSuffix.isEmpty()) { 123 252 suffixFound = true; // suffix is the empty word 124 253 } else { 125 254 TrieNode<T> node = find(contextSuffix); 126 if ( node!=null) {127 if ( !node.getFollowingSymbols().isEmpty()) {255 if (node != null) { 256 if (!node.getFollowingSymbols().isEmpty()) { 128 257 suffixFound = true; 129 258 } 130 259 } 131 if ( !suffixFound) {260 if (!suffixFound) { 132 261 contextSuffix.remove(0); 133 262 } 134 263 } 135 264 } 136 265 137 266 return contextSuffix; 138 267 } 139 140 141 static public class Edge{} 142 268 269 /** 270 * <p> 271 * Helper class for graph visualization of a trie. 272 * </p> 273 * 274 * @author Steffen Herbold 275 * @version 1.0 276 */ 277 static public class Edge { 278 } 279 280 /** 281 * <p> 282 * Helper class for graph visualization of a trie. 283 * </p> 284 * 285 * @author Steffen Herbold 286 * @version 1.0 287 */ 143 288 static public class TrieVertex { 289 290 /** 291 * <p> 292 * Id of the vertex. 293 * </p> 294 */ 144 295 private String id; 296 297 /** 298 * <p> 299 * Contructor. Creates a new TrieVertex. 300 * </p> 301 * 302 * @param id 303 * id of the vertex 304 */ 145 305 protected TrieVertex(String id) { 146 306 this.id = id; 147 307 } 308 309 /** 310 * <p> 311 * Returns the id of the vertex. 312 * </p> 313 * 314 * @see java.lang.Object#toString() 315 */ 316 @Override 148 317 public String toString() { 149 318 return id; 150 319 } 151 320 } 152 321 322 /** 323 * <p> 324 * Returns a {@link Graph} representation of the trie. 325 * </p> 326 * 327 * @return {@link Graph} representation of the trie 328 */ 153 329 protected Tree<TrieVertex, Edge> getGraph() { 154 330 DelegateTree<TrieVertex, Edge> graph = new DelegateTree<TrieVertex, Edge>(); … … 156 332 return graph; 157 333 } 158 334 335 /* 336 * (non-Javadoc) 337 * 338 * @see de.ugoe.cs.eventbench.models.IDotCompatible#getDotRepresentation() 339 */ 159 340 public String getDotRepresentation() { 160 341 StringBuilder stringBuilder = new StringBuilder(); … … 164 345 return stringBuilder.toString(); 165 346 } 166 347 348 /** 349 * <p> 350 * Returns the string representation of the root node. 351 * </p> 352 * 353 * @see TrieNode#toString() 354 * @see java.lang.Object#toString() 355 */ 167 356 @Override 168 357 public String toString() { 169 358 return rootNode.toString(); 170 359 } 171 360 361 /** 362 * <p> 363 * Returns the number of symbols contained in the trie. 364 * </p> 365 * 366 * @return 367 */ 172 368 public int getNumSymbols() { 173 369 return knownSymbols.size();
Note: See TracChangeset
for help on using the changeset viewer.