Ignore:
Timestamp:
06/15/11 16:54:11 (13 years ago)
Author:
sherbold
Message:
  • changed weblog parser such that session without a minimum length are pruned to remove single page visits from the sessions (e.g., through robots)
  • changed loadSessionsFromClickstream such that the session timeout and the minimum session length are optional parameters
File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/EventBenchConsole/src/de/ugoe/cs/eventbench/web/WeblogParser.java

    r54 r68  
    1414 
    1515import de.ugoe.cs.eventbench.web.data.WebEvent; 
     16import de.ugoe.cs.util.console.Console; 
    1617 
    1718public class WeblogParser { 
    1819         
    1920        private long timeout; 
     21         
     22        private int minLength = 2; 
    2023         
    2124        private List<List<WebEvent>> sequences; 
     
    3134        public List<List<WebEvent>> getSequences() { 
    3235                return sequences;  
     36        } 
     37         
     38        public void setTimeout(long timeout) { 
     39                this.timeout = timeout; 
     40        } 
     41         
     42        public void setMinLength(int minLength) { 
     43                this.minLength = minLength; 
    3344        } 
    3445         
     
    89100                        } 
    90101                } 
     102                Console.traceln(""+sequences.size()+ " user sequences found"); 
     103                // prune sequences shorter than min-length 
     104                for( int i=0; i<sequences.size(); i++ ) { 
     105                        if( sequences.get(i).size()<minLength ) { 
     106                                sequences.remove(i); 
     107                        } 
     108                } 
     109                Console.traceln(""+sequences.size()+ " remaining after pruning of sequences shorter than " + minLength); 
    91110        } 
    92111} 
Note: See TracChangeset for help on using the changeset viewer.