1
回答
MongoDB Middle Level (大数据量下MapReduce取代GroupBy)

MongoDB中的MapReduce其实更类似关系型数据库中的GroupBy 。

刚做了下这样试验,对于大数据量的GroupBy(MapReduce)还是比较理想的,生成100W条3位随机字符串

  for (var i=0; i<1000000; i++) 
  {
	  var x = "0123456789";
	  var tmp="";
	  for (var j=0; j<3; j++)
	  {
		  tmp += x.charAt(Math.ceil(Math.random()*100000000)%x.length);|
	  }
	  var u = {_id:i,v1:tmp};
	  db.RandomNum.insert(u);
  }


然后进行对相同的随机数取Count数 所以必须GroupBy


var m = function(){emit(this.v1,{count:1}); }; //map key类似关系型数据的group by 第二个是value 就是要进行聚合的字段(sum...)
  
var r = function (key,values) { var total = 0;for (var i=0; i<values.length; i++) { total += values[i].count;     } return {count : total}; };. //reduce
 
var res = db.RandomNum.mapReduce(m, r, {out:{replace:'Result'}});

db[res.result].find()


测试了下时间:

var startTime = new Date();

var m = function(){emit(this.v1,{count:1}); };

var r = function (key,values) { var total = 0;for (var i=0; i<values.length; i++) { total += values[i].count; } return {count : total}; }; 

var res = db.RandomNum.mapReduce(m, r, {out:{replace:'Result'}});

db[res.result].find()

(new Date().getTime()-startTime.getTime())/1000

结果如下:

> db[res.result].find()
{ "_id" : "000", "value" : { "count" : 1075 } }
{ "_id" : "001", "value" : { "count" : 1045 } }
{ "_id" : "002", "value" : { "count" : 1022 } }
{ "_id" : "003", "value" : { "count" : 968 } }
{ "_id" : "004", "value" : { "count" : 994 } }
{ "_id" : "005", "value" : { "count" : 1009 } }
{ "_id" : "006", "value" : { "count" : 948 } }
{ "_id" : "007", "value" : { "count" : 1003 } }
{ "_id" : "008", "value" : { "count" : 983 } }
{ "_id" : "009", "value" : { "count" : 993 } }
{ "_id" : "010", "value" : { "count" : 987 } }
{ "_id" : "011", "value" : { "count" : 982 } }
{ "_id" : "012", "value" : { "count" : 957 } }
{ "_id" : "013", "value" : { "count" : 1031 } }
{ "_id" : "014", "value" : { "count" : 971 } }
{ "_id" : "015", "value" : { "count" : 1053 } }
{ "_id" : "016", "value" : { "count" : 974 } }
{ "_id" : "017", "value" : { "count" : 975 } }
{ "_id" : "018", "value" : { "count" : 978 } }
{ "_id" : "019", "value" : { "count" : 1010 } }
has more
> 
> (new Date().getTime()-startTime.getTime())/1000
63.335s
> bye

测试机的性能:





原文链接:http://blog.csdn.net/crazyjixiang/article/details/6619911
举报
晨曦之光
发帖于6年前 1回/622阅
顶部