链接: https://pan.baidu.com/s/1yGBAYhJjpI7HFc8yzrEq1w
提取码: f663
什么是NoSQL
在Web2.0时代,简单的关系型数据库固有的缺陷无法处理大数据,于是出现了新型的数据库,这些数据库不使用SQL接口操作数据库,所以称为NoSQL数据库
NoSQL数据库分类
- Key-value:memcache, redis
- 文档型:Mongodb
- 列式:hbase
- 图:neo4j
CAP理论
场景,即讨论背景为分布式系统
- Consistency:所有节点上的数据时刻保持同步
- Availability:每个请求都能接受到一个响应,无论成功还是失败
- Partition Tolerance:系统应该能持续提供服务
CAP理论说的是在一个分布式系统下,不存在任何分布式算法能满足三条
NoSQL特点
- 模式自由
- 逆范式化(允许数据冗余)
- 多分区存储
- 动态水平扩展
- 多副本异步复制
- 软事务(最终一致性)
#!/usr/bin/python # encoding: utf-8 """ @author: www.ai8py.com @file: 01mongodemo.py """ import pymongo from pymongo import MongoClient conn = MongoClient('localhost') db = conn.beifeng students = db.test.students # 设置写安全级别 # from pymongo.write_concern import WriteConcern # students = col.with_options(write_concern=WriteConcern(w=1,j=True)) students.remove(None) # 嵌套文档 dasheng = {'name':'dasheng' , 'age' :30 , 'sex' :'m' , 'contact': { 'email1' : 'abc@def.com' , 'email2' :'def@abc.net' } } bajie = {'name':'bajie', 'habit':{ 'habit1' : 'eat', 'habit2' : 'sleep' } } #1、插入记录 students.insert_one(dasheng) x=students.insert_one(bajie) # 2、批量插入 from faker import Factory import random def getFakeData(n=10): userfaker = Factory.create() label = ['name','address','email','age'] result = [] for i in range(n): x = [userfaker.name(),userfaker.address(),userfaker.email(),random.randint(10,40)] result.append(dict(zip(label,x))) return result userinfo = getFakeData() z=students.insert_many(userinfo,ordered=False) # students.insert_one({'name':'haha'}) # # # #2、查询 import json from bson import json_util # cursor=students.find({}) # cursor=students.find({'name':'dasheng'}) # cursor=students.find({'name':{'$in':['dasheng','bajie']}}) # 年龄大于25岁 # cursor=students.find({'age':{'$gt':25}}) # and # cursor=students.find( { 'name':{'$in':['dasheng','bajie']}, # 'age':{'$gt':25} # } # ) # or # cursor=students.find( {'$or':[ {'name':{'$in':['dasheng','bajie']}}, # {'age': {'$gt':30}} # ] # } # ) # # cursor = students.find( {'habit.habit2':'eat'}) # for student in cursor: # # print student # print json.dumps(student,indent=4,default=json_util.default) #3.更新 # $inc , 如果记录中没有这个字段,会增加此字段 # students.update_many( # {}, # {'$inc': # {'age':2} # } # ) # $min # students.update_many( # {'name': # {'$in':['dasheng','bajie']} # } , # {'$min': # {'age':20} # } # ) # $currentDate # students.update_many( # {'name': # {'$in':['dasheng','bajie']} # } , # { '$currentDate' : # {'create_time':True, # 'mod_time':{'$type':'timestamp'} # } # } # ) # # 更新整个内嵌文档 # students.update_one( # {'name':'dasheng'}, # {'$set' : # { # 'contact':{ # 'email1':'beijing', # 'email2':'haidian' # } # } # } # ) # # 更新内嵌文档部分字段 # students.update_one( # {'name':'dasheng'}, # {'$set' : { # 'contact.email1':'abcd@efg.com' # }} # ) # 删除 students.remove({'name':'dasheng'}) # find and update record=students.find_one_and_update( {}, {'$set':{'locked':1}, '$inc':{'age':2} }, projection={'age':True,'name':True}, sort =[('age',pymongo.DESCENDING)], return_document=pymongo.ReturnDocument.BEFORE )
#!/usr/bin/python # encoding: utf-8 """ @author: www.ai8py.com @file: 02.loadzip.py """ import json from pymongo import MongoClient conn = MongoClient('localhost') db = conn.beifeng col = db.test.zips col.remove(None) f = open('zips.json') for line in f: x=json.loads(line) #print x col.insert_one(x) f.close()
#!/usr/bin/python # encoding: utf-8 """ @author: www.ai8py.com @file: 03.aggdemo.py """ from pymongo import MongoClient from pymongo.write_concern import WriteConcern from faker import Factory conn = MongoClient('localhost') db = conn.beifeng col = db.test.zips # 排序 # cursor = col.aggregate([ # {'$sort':{'city':1,'state':1}}, # {'$project':{ # '_id':0, # 'state':1, # 'city':1, # 'pop':1 # }} # ]) # 人口数量超过1000万的州 # cursor = col.aggregate([ # { '$group': {'_id':'$state','totalPop' : {'$sum':'$pop'}}}, # { '$match':{ 'totalPop': {'$gte': 10*1000*1000}}} # ] # # ) # 每个州的平均城市人口 # cursor = col.aggregate([ # { '$group':{'_id':{'state':'$state','city':'$city'},'pop': {'$sum':'$pop'}}}, # { '$group':{'_id':'$_id.state','avgCityPop':{'$avg':'$pop'}}}, # {'$sort':{'avgCityPop':-1}} # ] # ) # 每个州人口最多和最少的城市 cursor = col.aggregate([ { '$group':{'_id':{'state':'$state','city':'$city'},'pop':{'$sum':'$pop'}}}, {'$sort' : {'pop':1}}, { '$group':{'_id':'$_id.state', 'biggestCity':{'$last':'$_id.city'}, 'biggestPop':{'$last':'$pop'}, 'smallestCity':{'$first':'$_id.city'}, 'smallestPop' : {'$first':'$pop'} } }, {'$project':{ '_id':0, 'biggestCity':{'city':'$biggestCity','pop':'$biggestPop'}, 'smallestCity':{'city':'$smallestCity','pop':'$smallestPop'} }} ]) for doc in cursor: print doc
#!/usr/bin/python # encoding: utf-8 """ @author: www.ai8py.com @file: 04.spaggdemo.py """ from pymongo import MongoClient from pymongo.write_concern import WriteConcern from faker import Factory conn = MongoClient('localhost') db = conn.beifeng col = db.test.zips # count=col.count('state') print count # cursor= col.distinct('state') # # for doc in cursor: # print doc # print cursor # func = ''' # function(cur,result){ # result.count += 1 # } # ''' # cursor = col.group( # {'state':1,'city':1}, # {}, # {'count':0}, # func # ) mapfunc = ''' function(){ emit({'state':this.state,'city':this.city},1); } ''' redfunc = ''' function(key,values){ return values.length; } ''' col.map_reduce(mapfunc,redfunc,query={},out='test.results') # for doc in cursor: # print doc