1. KeywordsStaticsModel 数据模型
private String id; private String keyword; //关键词 private String keywordUuid;//keyword + datestr 的md5 //搜索相关 private int sessionCount = 0; private int adsCount = 0; //点击相关 private int clickCount = 0; private int yahooClick = 0; private float score;//排序分数 private long createTime; //创建时间 private String createTimeStr; //记录当前访问时间
2. mapreduce 统计 每个关键字的sessionCount总数
//Query query = new Query(); query.addCriteria(Criteria.where("createTime").gte(timeStart).lt(timeEnd)); String mapFunction = "function(){" + "emit(this.keyword,{" + "sessionCount:this.sessionCount," + "adsCount:this.adsCount," + "amazonClick:this.amazonClick," + "becomeClick:this.becomeClick," + "clickCount:this.clickCount," + "rakutenClick:this.rakutenClick})" + "}"; String reduceFunction = "function(key, values){" + "var x = {sessionCount:0,adsCount:0,clickCount:0,yahooClick:0,yahooActionClick:0};" + "values.forEach(function(val){ " + "x.sessionCount+=val.sessionCount; "+ "x.adsCount += val.adsCount; "+ "x.clickCount += val.clickCount; "+ "});" + "return x;}"; long startTime = System.currentTimeMillis(); MapReduceResults<KeywordsStaticsModel> asd = getMongoTemplate().mapReduce(query, "keywordsStaticsModel", mapFunction, reduceFunction, KeywordsStaticsModel.class); long endTime = System.currentTimeMillis(); System.out.println("spend time ==>"+(endTime-startTime));
3. group 的 统计 每个关键字的sessionCount总数,注意最多只能处理20000条数据。
Criteria criteria = Criteria.where("createTime").gte(timeStart).lt(timeEnd); String reduceFunction = "function(obj,prev){" + "prev.sessionCountSum += obj.sessionCount;" + "prev.adsCountSum += obj.adsCount;}"; GroupBy groupBy = new GroupBy("keyword"); groupBy.reduceFunction(reduceFunction); DBObject dbObject = new BasicDBObject(); dbObject.put("sessionCountSum", 0); dbObject.put("adsCountSum", 0); groupBy.initialDocument(dbObject); GroupByResults<KeywordsStaticsModel> ttt = getMongoTemplate().group(criteria,"keywordsStaticsModel", groupBy, KeywordsStaticsModel.class); DBObject resultSet = ttt.getRawResults(); System.out.println("count==>"+resultSet.get("count")); System.out.println("keys==>"+resultSet.get("keys"));