共计 10758 个字符,预计需要花费 27 分钟才能阅读完成。
Mongodb是目前最流行的NoSQL数据库,使用的是BJson数据类型(类似Json)
博主经常用其来存储爬虫的数据的o( ̄┰ ̄*)ゞ
PyMongo
临时使用docker创建一个mongodb实例
[root@master ~]# docker run -itd --name mongo -p 27017:27017 mongo --auth
52ce66638eeb6cea11c1c4bc830f6e611058c92cae65c7350b330e7cd7e3e433
[root@master ~]# docker ps -a | grep mongo
52ce66638eeb mongo "docker-entrypoint.s…" 14 seconds ago Up 13 seconds 0.0.0.0:27017->27017/tcp mongo
[root@master ~]# docker exec -it mongo mongo admin
MongoDB shell version v5.0.8
connecting to: mongodb://127.0.0.1:27017/admin?compressors=disabled&gssapiServiceName=mongodb
{"t":{"$date":"2020-05-02T17:07:48.416Z"},"s":"I", "c":"NETWORK", "id":5693100, "ctx":"js","msg":"Asio socket.set_option failed with std::system_error","attr":{"note":"connect (sync) TCP fast open","option":{"level":6,"name":30,"data":"01 00 00 00"},"error":{"what":"set_option: Protocol not available","message":"Protocol not available","category":"asio.system","value":92}}}
Implicit session: session { "id" : UUID("f50d2ace-54b2-4a18-9ebe-109ef1da2391") }
MongoDB server version: 5.0.8
================
Warning: the "mongo" shell has been superseded by "mongosh",
which delivers improved usability and compatibility.The "mongo" shell has been deprecated and will be removed in
an upcoming release.
For installation instructions, see
https://docs.mongodb.com/mongodb-shell/install/
================
Welcome to the MongoDB shell.
For interactive help, type "help".
For more comprehensive documentation, see
https://docs.mongodb.com/
Questions? Try the MongoDB Developer Community Forums
https://community.mongodb.com
> db.createUser({ user:'admin',pwd:'123456',roles:[ { role:'userAdminAnyDatabase', db: 'admin'},"readWriteAnyDatabase"]})
Successfully added user: {
"user" : "admin",
"roles" : [
{
"role" : "userAdminAnyDatabase",
"db" : "admin"
},
"readWriteAnyDatabase"
]
}
> db.auth('admin', '123456')
1
> exit
bye
使用pymongo驱动来连接Mongodb
pip3 install pymongo
测试demo
[root@master mongodb]# cat >demo1.py<<EOF
#!/usr/bin/python3
import pymongo
myclient = pymongo.MongoClient('mongodb://admin:123456@localhost:27017/')
# 此时创建并不会在mongodb上真实的创建一个runoobdb,只有在插入数据时才会创建该库和集合
mydb = myclient["runoobdb"]
mycol = mydb["sites"]
dblist = myclient.list_database_names()
collist = mydb.list_collection_names()
if "runoobdb" in dblist:
print("数据库已存在!")
else:
print("数据库不存在!")
if "sites" in collist:
print("集合已存在!")
else:
print("集合不存在!")
EOF
# 执行测试
[root@master mongodb]# python3 demo1.py
数据库不存在!
集合不存在!
Pymongo 数据基本操作
增加
增加一条数据
[root@master mongodb]# cat >demo2.py<<EOF
#!/usr/bin/python3
import pymongo
myclient = pymongo.MongoClient("mongodb://admin:123456@localhost:27017/")
mydb = myclient["runoobdb"]
mycol = mydb["sites"]
# 未指定id,则由mongodb自动生成"_id"
mydict = { "name": "RUNOOB", "alexa": "10000", "url": "https://www.runoob.com" }
x = mycol.insert_one(mydict)
print(x)
# 返回对象存在inserted_id属性,该属性值是插入mongodb的唯一"_id"
print(x.inserted_id)
EOF
# 执行测试
[root@master mongodb]# python3 demo2.py
<pymongo.results.InsertOneResult object at 0x7f6d78584b38>
627011d8468e421bd3932e64
插入多条数据
[root@master mongodb]# cat >demo3.py<<EOF
#!/usr/bin/python3
import pymongo
myclient = pymongo.MongoClient("mongodb://admin:123456@localhost:27017/")
mydb = myclient["runoobdb"]
mycol = mydb["sites"]
mylist = [
{ "name": "Taobao", "alexa": "100", "url": "https://www.taobao.com" },
{ "name": "QQ", "alexa": "101", "url": "https://www.qq.com" },
{ "name": "Facebook", "alexa": "10", "url": "https://www.facebook.com" },
{ "name": "知乎", "alexa": "103", "url": "https://www.zhihu.com" },
{ "name": "Github", "alexa": "109", "url": "https://www.github.com" }
]
x = mycol.insert_many(mylist)
# 输出插入的所有文档对应的 _id 值
print(x.inserted_ids)
EOF
# 执行测试
[root@master mongodb]# python3 demo3.py
[ObjectId('62701243450eec8de54febe0'), ObjectId('62701243450eec8de54febe1'), ObjectId('62701243450eec8de54febe2'), ObjectId('62701243450eec8de54febe3'), ObjectId('62701243450eec8de54febe4')]
删除
删除一条数据
[root@master mongodb]# cat >demo10.py<<EOF
#!/usr/bin/python3
import pymongo
myclient = pymongo.MongoClient("mongodb://admin:123456@localhost:27017/")
mydb = myclient["runoobdb"]
mycol = mydb["sites"]
myquery = { "name": "Taobao" }
mycol.delete_one(myquery)
# 删除后输出
for x in mycol.find():
print(x)
EOF
# 测试
[root@master mongodb]# python3 demo10.py
{'_id': ObjectId('627011d8468e421bd3932e64'), 'name': 'RUNOOB', 'alexa': '12345', 'url': 'https://www.runoob.com'}
{'_id': ObjectId('62701243450eec8de54febe1'), 'name': 'QQ', 'alexa': '101', 'url': 'https://www.qq.com'}
{'_id': ObjectId('62701243450eec8de54febe2'), 'name': 'Facebook', 'alexa': '123', 'url': 'https://www.facebook.com'}
{'_id': ObjectId('62701243450eec8de54febe3'), 'name': '知乎', 'alexa': '103', 'url': 'https://www.zhihu.com'}
{'_id': ObjectId('62701243450eec8de54febe4'), 'name': 'Github', 'alexa': '109', 'url': 'https://www.github.com'}
{'_id': ObjectId('627012e623e1c247c715ced7'), 'name': 'Taobao', 'alexa': '100', 'url': 'https://www.taobao.com'}
{'_id': ObjectId('627012e623e1c247c715ced8'), 'name': 'QQ', 'alexa': '101', 'url': 'https://www.qq.com'}
{'_id': ObjectId('627012e623e1c247c715ced9'), 'name': 'Facebook', 'alexa': '123', 'url': 'https://www.facebook.com'}
{'_id': ObjectId('627012e623e1c247c715ceda'), 'name': '知乎', 'alexa': '103', 'url': 'https://www.zhihu.com'}
{'_id': ObjectId('627012e623e1c247c715cedb'), 'name': 'Github', 'alexa': '109', 'url': 'https://www.github.com'}
删除多条数据
[root@master mongodb]# cat >demo11.py<<EOF
#!/usr/bin/python3
import pymongo
myclient = pymongo.MongoClient("mongodb://admin:123456@localhost:27017/")
mydb = myclient["runoobdb"]
mycol = mydb["sites"]
myquery = { "name": {"$regex": "^F"} }
x = mycol.delete_many(myquery)
print(x.deleted_count, "个文档已删除")
EOF
# 测试
[root@master mongodb]# python3 demo11.py
2 个文档已删除
delete_many() 若此方法传入空对象,则删除该集合内所有文档:如 mycol.delete_many({})
drop()方法可以删除集合,如:mycol.drop()
修改
更新一条数据
[root@master mongodb]# cat >demo8.py<<EOF
#!/usr/bin/python3
import pymongo
myclient = pymongo.MongoClient("mongodb://admin:123456@localhost:27017/")
mydb = myclient["runoobdb"]
mycol = mydb["sites"]
myquery = { "alexa": "10000" }
newvalues = { "$set": { "alexa": "12345" } }
mycol.update_one(myquery, newvalues)
# 输出修改后的 "sites" 集合
for x in mycol.find():
print(x)
EOF
# 测试
[root@master mongodb]# python3 demo8.py
{'_id': ObjectId('627011d8468e421bd3932e64'), 'name': 'RUNOOB', 'alexa': '12345', 'url': 'https://www.runoob.com'}
{'_id': ObjectId('62701243450eec8de54febe0'), 'name': 'Taobao', 'alexa': '100', 'url': 'https://www.taobao.com'}
{'_id': ObjectId('62701243450eec8de54febe1'), 'name': 'QQ', 'alexa': '101', 'url': 'https://www.qq.com'}
{'_id': ObjectId('62701243450eec8de54febe2'), 'name': 'Facebook', 'alexa': '10', 'url': 'https://www.facebook.com'}
{'_id': ObjectId('62701243450eec8de54febe3'), 'name': '知乎', 'alexa': '103', 'url': 'https://www.zhihu.com'}
{'_id': ObjectId('62701243450eec8de54febe4'), 'name': 'Github', 'alexa': '109', 'url': 'https://www.github.com'}
{'_id': ObjectId('627012e623e1c247c715ced7'), 'name': 'Taobao', 'alexa': '100', 'url': 'https://www.taobao.com'}
{'_id': ObjectId('627012e623e1c247c715ced8'), 'name': 'QQ', 'alexa': '101', 'url': 'https://www.qq.com'}
{'_id': ObjectId('627012e623e1c247c715ced9'), 'name': 'Facebook', 'alexa': '10', 'url': 'https://www.facebook.com'}
{'_id': ObjectId('627012e623e1c247c715ceda'), 'name': '知乎', 'alexa': '103', 'url': 'https://www.zhihu.com'}
{'_id': ObjectId('627012e623e1c247c715cedb'), 'name': 'Github', 'alexa': '109', 'url': 'https://www.github.com'}
更新多条数据
[root@master mongodb]# cat >demo9.py<<EOF
#!/usr/bin/python3
import pymongo
myclient = pymongo.MongoClient("mongodb://admin:123456@localhost:27017/")
mydb = myclient["runoobdb"]
mycol = mydb["sites"]
myquery = { "name": { "$regex": "^F" } }
newvalues = { "$set": { "alexa": "123" } }
x = mycol.update_many(myquery, newvalues)
print(x.modified_count, "文档已修改")
EOF
# 测试
[root@master mongodb]# python3 demo9.py
2 文档已修改
查询
查询一条数据
[root@master mongodb]# cat >demo4.py<<EOF
#!/usr/bin/python3
import pymongo
myclient = pymongo.MongoClient("mongodb://admin:123456@localhost:27017/")
mydb = myclient["runoobdb"]
mycol = mydb["sites"]
x = mycol.find_one()
print(x)
EOF
[root@master mongodb]# python3 demo4.py
{'_id': ObjectId('627011d8468e421bd3932e64'), 'name': 'RUNOOB', 'alexa': '10000', 'url': 'https://www.runoob.com'}
查询所有数据
[root@master mongodb]# cat >demo5.py<<EOF
#!/usr/bin/python3
import pymongo
myclient = pymongo.MongoClient("mongodb://admin:123456@localhost:27017/")
mydb = myclient["runoobdb"]
mycol = mydb["sites"]
for x in mycol.find():
print(x)
EOF
# 测试
[root@master mongodb]#
[root@master mongodb]# python3 demo5.py
{'_id': ObjectId('627011d8468e421bd3932e64'), 'name': 'RUNOOB', 'alexa': '10000', 'url': 'https://www.runoob.com'}
{'_id': ObjectId('62701243450eec8de54febe0'), 'name': 'Taobao', 'alexa': '100', 'url': 'https://www.taobao.com'}
{'_id': ObjectId('62701243450eec8de54febe1'), 'name': 'QQ', 'alexa': '101', 'url': 'https://www.qq.com'}
{'_id': ObjectId('62701243450eec8de54febe2'), 'name': 'Facebook', 'alexa': '10', 'url': 'https://www.facebook.com'}
{'_id': ObjectId('62701243450eec8de54febe3'), 'name': '知乎', 'alexa': '103', 'url': 'https://www.zhihu.com'}
{'_id': ObjectId('62701243450eec8de54febe4'), 'name': 'Github', 'alexa': '109', 'url': 'https://www.github.com'}
{'_id': ObjectId('627012e623e1c247c715ced7'), 'name': 'Taobao', 'alexa': '100', 'url': 'https://www.taobao.com'}
{'_id': ObjectId('627012e623e1c247c715ced8'), 'name': 'QQ', 'alexa': '101', 'url': 'https://www.qq.com'}
{'_id': ObjectId('627012e623e1c247c715ced9'), 'name': 'Facebook', 'alexa': '10', 'url': 'https://www.facebook.com'}
{'_id': ObjectId('627012e623e1c247c715ceda'), 'name': '知乎', 'alexa': '103', 'url': 'https://www.zhihu.com'}
{'_id': ObjectId('627012e623e1c247c715cedb'), 'name': 'Github', 'alexa': '109', 'url': 'https://www.github.com'}
使用find()方法,该方法的用法类似mongodb的find()
搜索指定字段
[root@master mongodb]# cat >demo6.py<<EOF
#!/usr/bin/python3
import pymongo
myclient = pymongo.MongoClient("mongodb://admin:123456@localhost:27017/")
mydb = myclient["runoobdb"]
mycol = mydb["sites"]
for x in mycol.find({},{ "_id": 0, "name": 1, "alexa": 1 }):
print(x)
EOF
# 测试
[root@master mongodb]# python3 demo6.py
{'name': 'RUNOOB', 'alexa': '10000'}
{'name': 'Taobao', 'alexa': '100'}
{'name': 'QQ', 'alexa': '101'}
{'name': 'Facebook', 'alexa': '10'}
{'name': '知乎', 'alexa': '103'}
{'name': 'Github', 'alexa': '109'}
{'name': 'Taobao', 'alexa': '100'}
{'name': 'QQ', 'alexa': '101'}
{'name': 'Facebook', 'alexa': '10'}
{'name': '知乎', 'alexa': '103'}
{'name': 'Github', 'alexa': '109'}
返回指定条数数据
[root@master mongodb]# cat >demo7.py<<EOF
import pymongo
myclient = pymongo.MongoClient("mongodb://admin:123456@localhost:27017/")
mydb = myclient["runoobdb"]
mycol = mydb["sites"]
myresult = mycol.find().limit(3)
# 输出结果
for x in myresult:
print(x)
EOF
# 测试
[root@master mongodb]# python3 demo7.py
{'_id': ObjectId('627011d8468e421bd3932e64'), 'name': 'RUNOOB', 'alexa': '10000', 'url': 'https://www.runoob.com'}
{'_id': ObjectId('62701243450eec8de54febe0'), 'name': 'Taobao', 'alexa': '100', 'url': 'https://www.taobao.com'}
{'_id': ObjectId('62701243450eec8de54febe1'), 'name': 'QQ', 'alexa': '101', 'url': 'https://www.qq.com'}
常用查询示例
# 正则匹配
myquery = { "name": { "$regex": "^R" } }
myquery = { item: { $not: { $regex: "^p.*" } } }
myquery = { $text: { $search: "\"coffee shop\"" } }
# 判断
myquery = {alexa: {$eq:101} }
myquery = {alexa: {$gt:101} }
myquery = {alexa: {$gte:101} }
myquery = {alexa: {$lte:101} }
myquery = {alexa: {$ne:101} }
myquery = {alexa: {$in:[101,100]} }
myquery = {alexa: {$nin:[101,100]} }
# 逻辑与或
myquery = { $and: [ { price: { $ne: 1.99 } }, { price: { $exists: true } } ] }
myquery = { $or: [ { quantity: { $lt: 20 } }, { price: 10 } ] }
myquery = { $nor: [ { price: 1.99 }, { qty: { $lt: 20 } }, { sale: true } ] }
# 按字段排序, 1 为升序,-1 为降序,默认为升序
mydocs = mycol.find(myquery).sort("alexa", -1)
正文完