- Timestamp:
- 10/05/11 00:39:28 (13 years ago)
- Location:
- trunk/EventBenchConsole/src/de/ugoe/cs/eventbench/web
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/EventBenchConsole/src/de/ugoe/cs/eventbench/web/WeblogParser.java
r225 r232 10 10 import java.util.Collection; 11 11 import java.util.HashMap; 12 import java.util.HashSet; 12 13 import java.util.LinkedList; 13 14 import java.util.List; 14 15 import java.util.Map; 16 import java.util.Set; 15 17 16 18 import de.ugoe.cs.eventbench.web.data.WebEvent; … … 69 71 /** 70 72 * <p> 73 * List that stores the users (identified through their cookie id) to each 74 * sequence. 75 * </p> 76 */ 77 private List<String> users; 78 79 /** 80 * <p> 81 * List that stores the frequent users (identified through their cookie id) 82 * to each sequence. 83 * </p> 84 */ 85 private List<String> frequentUsers; 86 87 /** 88 * <p> 89 * Sequences for all frequent users. 90 * </p> 91 */ 92 private List<Collection<List<WebEvent>>> sequencesFrequentUsers; 93 94 /** 95 * <p> 96 * Threshold that defines how many sessions of a user are require to deem 97 * the user frequent. Note, that only sessions whose lengths is in range if 98 * {@link #minLength} and {@link #maxLength} are counted. 99 * </p> 100 */ 101 private int frequentUsersThreshold = -1; 102 103 /** 104 * <p> 71 105 * Name and path of the robot filter. 72 106 * </p> … … 164 198 public void setUrl(String url) { 165 199 this.url = url; 200 } 201 202 /** 203 * <p> 204 * Sets the threshold for frequent users. 205 * </p> 206 * 207 * @param threshold 208 * threshold value; if the value is <1, the sessions of the 209 * frequent users will not be determined 210 */ 211 public void setFrequentUserThreshold(int threshold) { 212 this.frequentUsersThreshold = threshold; 213 } 214 215 /** 216 * <p> 217 * Returns the IDs of all frequent users. 218 * </p> 219 * 220 * @return IDs of the frequent users 221 */ 222 public List<String> getFrequentUsers() { 223 return frequentUsers; 224 } 225 226 /** 227 * <p> 228 * Returns the sequences of all frequent users. 229 * </p> 230 * </p> 231 * 232 * @return list of the sequences of all frequent users 233 */ 234 public List<Collection<List<WebEvent>>> getFrequentUserSequences() { 235 return sequencesFrequentUsers; 166 236 } 167 237 … … 192 262 193 263 sequences = new ArrayList<List<WebEvent>>(); 264 users = new ArrayList<String>(); 194 265 195 266 int lineCounter = 0; … … 216 287 if (values.length == 6) { // post vars found 217 288 for (String postVar : values[5].trim().split(" ")) { 218 postedVars.add(postVar); 289 // TODO manual filtering of bad variables, should be 290 // automated 291 if (!postVar.contains("and")) { 292 postedVars.add(postVar); 293 } 219 294 } 220 295 } … … 236 311 cookieSessionMap.put(cookie, sessionIds); 237 312 sequences.add(new LinkedList<WebEvent>()); 313 users.add(cookie); 238 314 } 239 315 Integer lastSessionIndex = sessionIds … … 251 327 newSession.add(event); 252 328 sequences.add(newSession); 329 users.add(cookie); 253 330 } else { 254 331 lastSession.add(event); … … 260 337 } 261 338 } 339 Console.traceln("" + sequences.size() + " user sequences found"); 262 340 pruneSequences(); 263 } 264 265 /** 266 * <p> 267 * Prunes sequences shorter than {@link #minLength}. 341 Console.traceln("" + sequences.size() 342 + " remaining after pruning of sequences shorter than " 343 + minLength); 344 Set<String> uniqueUsers = new HashSet<String>(users); 345 Console.traceln("" + uniqueUsers.size() + " unique users"); 346 if (frequentUsersThreshold > 0) { 347 generateFrequentUserSequences(uniqueUsers); 348 } 349 } 350 351 /** 352 * <p> 353 * Generates the frequent user sequences, according to the threshold 354 * {@link #frequentUsersThreshold}. 355 * </p> 356 * 357 * @param uniqueUsers 358 * set with all user IDs 359 */ 360 private void generateFrequentUserSequences(Set<String> uniqueUsers) { 361 frequentUsers = new ArrayList<String>(); 362 sequencesFrequentUsers = new ArrayList<Collection<List<WebEvent>>>(); 363 for (String user : uniqueUsers) { 364 List<String> tmp = new ArrayList<String>(); 365 tmp.add(user); 366 List<String> usersCopy = new LinkedList<String>(users); 367 usersCopy.retainAll(tmp); 368 int size = usersCopy.size(); 369 if (size >= frequentUsersThreshold) { 370 frequentUsers.add(user); 371 Collection<List<WebEvent>> sequencesUser = new ArrayList<List<WebEvent>>(); 372 for (int i = 0; i < sequences.size(); i++) { 373 if (users.get(i).equals(user)) { 374 sequencesUser.add(sequences.get(i)); 375 } 376 } 377 sequencesFrequentUsers.add(sequencesUser); 378 379 } 380 } 381 Console.traceln("" + frequentUsers.size() + " users with more than " 382 + frequentUsersThreshold + " sequences"); 383 } 384 385 /** 386 * <p> 387 * Prunes sequences shorter than {@link #minLength} and longer than 388 * {@link #maxLength}. 268 389 * </p> 269 390 */ 270 391 private void pruneSequences() { 271 Console.traceln("" + sequences.size() + " user sequences found");272 // prune sequences shorter than min-length and longer than maxLength273 392 int i = 0; 274 393 while (i < sequences.size()) { … … 276 395 || sequences.get(i).size() > maxLength) { 277 396 sequences.remove(i); 397 users.remove(i); 278 398 } else { 279 399 i++; 280 400 } 281 401 } 282 Console.traceln("" + sequences.size() 283 + " remaining after pruning of sequences shorter than " 284 + minLength); 402 285 403 } 286 404 … … 338 456 for (String paramPair : paramPairs) { 339 457 String[] paramSplit = paramPair.split("="); 340 getVars.add(paramSplit[0]); 458 // TODO manual filtering of bad variables, should be automated 459 if (!paramSplit[0].contains("and")) { 460 getVars.add(paramSplit[0]); 461 } 341 462 } 342 463 } -
trunk/EventBenchConsole/src/de/ugoe/cs/eventbench/web/commands/CMDloadSessionsFromClickstream.java
r226 r232 5 5 import java.security.InvalidParameterException; 6 6 import java.text.ParseException; 7 import java.util.Collection; 7 8 import java.util.List; 8 9 9 10 import de.ugoe.cs.eventbench.data.GlobalDataContainer; 10 11 import de.ugoe.cs.eventbench.web.WeblogParser; 12 import de.ugoe.cs.eventbench.web.data.WebEvent; 11 13 import de.ugoe.cs.util.console.Command; 12 14 import de.ugoe.cs.util.console.Console; … … 38 40 int minLength = -1; 39 41 int maxLength = -1; 40 if( parameters.size()>=3 ) { 42 boolean generateFrequentUsers = false; 43 int frequentUserThreshold = 20; 44 if (parameters.size() >= 3) { 41 45 serverUrl = (String) parameters.get(2); 42 46 } 43 if (parameters.size() >= 5) {47 if (parameters.size() >= 6) { 44 48 timeout = Integer.parseInt((String) parameters.get(3)); 45 49 minLength = Integer.parseInt((String) parameters.get(4)); 46 50 maxLength = Integer.parseInt((String) parameters.get(5)); 47 51 } 52 if (parameters.size() >= 8) { 53 generateFrequentUsers = Boolean.parseBoolean((String) parameters 54 .get(6)); 55 frequentUserThreshold = Integer 56 .parseInt((String) parameters.get(7)); 57 } 48 58 49 59 WeblogParser parser = new WeblogParser(); 50 if ( serverUrl!=null) {60 if (serverUrl != null) { 51 61 parser.setUrl(serverUrl); 52 62 } … … 55 65 parser.setMinLength(minLength); 56 66 parser.setMaxLength(maxLength); 67 } 68 if (generateFrequentUsers) { 69 parser.setFrequentUserThreshold(frequentUserThreshold); 57 70 } 58 71 try { … … 71 84 Console.traceln("Old data \"" + sequencesName + "\" overwritten"); 72 85 } 86 if (generateFrequentUsers) { 87 List<String> frequentUserIDs = parser.getFrequentUsers(); 88 List<Collection<List<WebEvent>>> frequentUserSessions = parser 89 .getFrequentUserSequences(); 90 for (int i = 0; i < frequentUserIDs.size(); i++) { 91 GlobalDataContainer.getInstance().addData( 92 sequencesName + "_" + frequentUserIDs.get(i), 93 frequentUserSessions.get(i)); 94 } 95 } 73 96 } 74 97 … … 80 103 @Override 81 104 public void help() { 82 Console.println("Usage: loadSessionsFromClickstream <filename> <sequencesName> {<serverUrl>} {<timeout> <minSessionLength> <maxSessionLength>} ");105 Console.println("Usage: loadSessionsFromClickstream <filename> <sequencesName> {<serverUrl>} {<timeout> <minSessionLength> <maxSessionLength>} {<generateFrequentUsers> <frequentUserThreshold>}"); 83 106 } 84 107
Note: See TracChangeset
for help on using the changeset viewer.