操作MongoDB数据库

MongoDB特有的特点
类JSON数据格式(BSON)
多级索引
面向文档,模式自由
高可用的复制集
水平扩展
跨平台,多种语言接口
弱事务性(具有原子性,原子操作是针对一个文件的)
大数据量、高并发、弱事务性的web2.0互联网应用。


链接: https://pan.baidu.com/s/1yGBAYhJjpI7HFc8yzrEq1w
提取码: f663

什么是NoSQL

在Web2.0时代,简单的关系型数据库固有的缺陷无法处理大数据,于是出现了新型的数据库,这些数据库不使用SQL接口操作数据库,所以称为NoSQL数据库

NoSQL数据库分类

  1. Key-value:memcache, redis
  2. 文档型:Mongodb
  3. 列式:hbase
  4. 图:neo4j

CAP理论

场景,即讨论背景为分布式系统

  1. Consistency:所有节点上的数据时刻保持同步
  2. Availability:每个请求都能接受到一个响应,无论成功还是失败
  3. Partition Tolerance:系统应该能持续提供服务
    CAP理论说的是在一个分布式系统下,不存在任何分布式算法能满足三条

NoSQL特点

  1. 模式自由
  2. 逆范式化(允许数据冗余)
  3. 多分区存储
  4. 动态水平扩展
  5. 多副本异步复制
  6. 软事务(最终一致性)
#!/usr/bin/python
# encoding: utf-8


"""
@author: www.ai8py.com
@file: 01mongodemo.py
"""

import pymongo
from pymongo import MongoClient

conn = MongoClient('localhost')
db = conn.beifeng
students = db.test.students

#  设置写安全级别
# from pymongo.write_concern import WriteConcern
# students  = col.with_options(write_concern=WriteConcern(w=1,j=True))

students.remove(None)

#  嵌套文档
dasheng = {'name':'dasheng' ,
           'age' :30 ,
           'sex' :'m' ,
           'contact': {
               'email1' : 'abc@def.com' ,
               'email2' :'def@abc.net'
            }
           }

bajie = {'name':'bajie',
         'habit':{
             'habit1' : 'eat',
             'habit2' : 'sleep'
         }
         }

#1、插入记录
students.insert_one(dasheng)

x=students.insert_one(bajie)

# 2、批量插入
from faker import Factory
import random
def getFakeData(n=10):
    userfaker = Factory.create()
    label = ['name','address','email','age']
    result = []
    for i in range(n):
        x = [userfaker.name(),userfaker.address(),userfaker.email(),random.randint(10,40)]
        result.append(dict(zip(label,x)))
    return result

userinfo = getFakeData()
z=students.insert_many(userinfo,ordered=False)

# students.insert_one({'name':'haha'})
#
#
# #2、查询
import json
from bson import json_util

# cursor=students.find({})
# cursor=students.find({'name':'dasheng'})
# cursor=students.find({'name':{'$in':['dasheng','bajie']}})
#  年龄大于25岁
# cursor=students.find({'age':{'$gt':25}})
#  and
# cursor=students.find( { 'name':{'$in':['dasheng','bajie']},
#                         'age':{'$gt':25}
#                         }
#                       )
#   or
# cursor=students.find( {'$or':[ {'name':{'$in':['dasheng','bajie']}},
#                                {'age': {'$gt':30}}
#                                ]
#                        }
#                       )
#
# cursor = students.find( {'habit.habit2':'eat'})
# for student in cursor:
#     # print student
#     print json.dumps(student,indent=4,default=json_util.default)



#3.更新

# $inc , 如果记录中没有这个字段,会增加此字段

# students.update_many(
#     {},
#     {'$inc':
#          {'age':2}
#      }
# )

# $min
# students.update_many(
#     {'name':
#          {'$in':['dasheng','bajie']}
#      } ,
#     {'$min':
#          {'age':20}
#      }
# )
# $currentDate
# students.update_many(
#     {'name':
#          {'$in':['dasheng','bajie']}
#      } ,
#     { '$currentDate' :
#           {'create_time':True,
#            'mod_time':{'$type':'timestamp'}
#            }
#      }
# )

# # 更新整个内嵌文档
# students.update_one(
#     {'name':'dasheng'},
#     {'$set' :
#          {
#              'contact':{
#                  'email1':'beijing',
#                  'email2':'haidian'
#              }
#          }
#     }
# )
# # 更新内嵌文档部分字段
# students.update_one(
#     {'name':'dasheng'},
#     {'$set' : {
#         'contact.email1':'abcd@efg.com'
#     }}
# )

# 删除
students.remove({'name':'dasheng'})


# find and update

record=students.find_one_and_update(
    {},
    {'$set':{'locked':1},
     '$inc':{'age':2}
     },
    projection={'age':True,'name':True},
    sort =[('age',pymongo.DESCENDING)],
    return_document=pymongo.ReturnDocument.BEFORE
)
#!/usr/bin/python
# encoding: utf-8


"""
@author: www.ai8py.com
@file: 02.loadzip.py
"""


import json
from pymongo import MongoClient

conn = MongoClient('localhost')
db = conn.beifeng
col = db.test.zips
col.remove(None)
f = open('zips.json')
for line in f:
    x=json.loads(line)
    #print x
    col.insert_one(x)
f.close()
#!/usr/bin/python
# encoding: utf-8


"""
@author: www.ai8py.com
@file: 03.aggdemo.py
"""

from pymongo import MongoClient
from pymongo.write_concern import WriteConcern
from faker import Factory
conn = MongoClient('localhost')
db = conn.beifeng
col = db.test.zips
# 排序
# cursor = col.aggregate([
#     {'$sort':{'city':1,'state':1}},
#     {'$project':{
#         '_id':0,
#         'state':1,
#         'city':1,
#         'pop':1
#     }}
# ])
# 人口数量超过1000万的州
# cursor = col.aggregate([
#     { '$group': {'_id':'$state','totalPop' : {'$sum':'$pop'}}},
#     { '$match':{ 'totalPop': {'$gte': 10*1000*1000}}}
# ]
#
# )

# 每个州的平均城市人口
# cursor = col.aggregate([
#     { '$group':{'_id':{'state':'$state','city':'$city'},'pop': {'$sum':'$pop'}}},
#     { '$group':{'_id':'$_id.state','avgCityPop':{'$avg':'$pop'}}},
#     {'$sort':{'avgCityPop':-1}}
# ]
# )

# 每个州人口最多和最少的城市
cursor = col.aggregate([
    { '$group':{'_id':{'state':'$state','city':'$city'},'pop':{'$sum':'$pop'}}},
    {'$sort' : {'pop':1}},
    { '$group':{'_id':'$_id.state',
                'biggestCity':{'$last':'$_id.city'},
                'biggestPop':{'$last':'$pop'},
                'smallestCity':{'$first':'$_id.city'},
                'smallestPop' : {'$first':'$pop'}
                }
      },
    {'$project':{
        '_id':0,
        'biggestCity':{'city':'$biggestCity','pop':'$biggestPop'},
        'smallestCity':{'city':'$smallestCity','pop':'$smallestPop'}
    }}

])


for doc in cursor:
    print doc
#!/usr/bin/python
# encoding: utf-8


"""
@author: www.ai8py.com
@file: 04.spaggdemo.py
"""


from pymongo import MongoClient
from pymongo.write_concern import WriteConcern
from faker import Factory
conn = MongoClient('localhost')
db = conn.beifeng
col = db.test.zips

#
count=col.count('state')
print count

# cursor= col.distinct('state')
#
# for doc in cursor:
#     print doc
# print cursor

# func = '''
#     function(cur,result){
#     result.count += 1
#     }
#     '''
# cursor = col.group(
#     {'state':1,'city':1},
#     {},
#     {'count':0},
#     func
# )

mapfunc = '''
   function(){
     emit({'state':this.state,'city':this.city},1);
   }
'''
redfunc = '''
   function(key,values){
      return values.length;
   }
'''

col.map_reduce(mapfunc,redfunc,query={},out='test.results')
# for doc in cursor:
#     print doc

发表评论

邮箱地址不会被公开。 必填项已用*标注