首先是安装mongodb
1yum install -y mongodb-server mongodb
2
3Installing for dependencies:
4 boost-filesystem
5 boost-iostreams
6 boost-program-options
7 boost-system
8 boost-thread
9 gperftools-libs
10 libicu
11 libunwind
12 v8
会装一堆依赖包,boost库,icu库,v8库,gperftools库,都是很厉害的库啊!
启动:
1service mongod start
导入海量数据:
1[root@ovs-16-11-2 ~]# mongoimport -d mydb -c prj01 --type csv --file opendata_projects.csv --headerline
2connected to: 127.0.0.1
3Thu Jul 28 09:50:47.002 Progress: 39118658/470754183 8%
4Thu Jul 28 09:50:47.002 74400 24800/second
5Thu Jul 28 09:50:50.033 Progress: 80042213/470754183 17%
6Thu Jul 28 09:50:50.033 150700 25116/second
7Thu Jul 28 09:50:53.145 Progress: 108143323/470754183 22%
8Thu Jul 28 09:50:53.145 202700 22522/second
9Thu Jul 28 09:50:56.004 Progress: 149781879/470754183 31%
10Thu Jul 28 09:50:56.004 280000 23333/second
11Thu Jul 28 09:50:59.001 Progress: 179705162/470754183 38%
12Thu Jul 28 09:50:59.001 336200 22413/second
13Thu Jul 28 09:51:03.385 Progress: 212197023/470754183 45%
14Thu Jul 28 09:51:03.385 396400 20863/second
15Thu Jul 28 09:51:06.015 Progress: 236552399/470754183 50%
16Thu Jul 28 09:51:06.015 441700 20077/second
17Thu Jul 28 09:51:09.299 Progress: 264365847/470754183 56%
18Thu Jul 28 09:51:09.299 493300 19732/second
19Thu Jul 28 09:51:12.001 Progress: 304790148/470754183 64%
20Thu Jul 28 09:51:12.001 568500 20303/second
21Thu Jul 28 09:51:15.033 Progress: 323508057/470754183 68%
22Thu Jul 28 09:51:15.033 603300 19461/second
23Thu Jul 28 09:51:18.607 Progress: 361610334/470754183 76%
24Thu Jul 28 09:51:18.607 674200 19829/second
25Thu Jul 28 09:51:21.000 Progress: 393748962/470754183 83%
26Thu Jul 28 09:51:21.000 733700 19829/second
27Thu Jul 28 09:51:24.007 Progress: 427667505/470754183 90%
28Thu Jul 28 09:51:24.007 796900 19922/second
29Thu Jul 28 09:51:27.001 Progress: 459658299/470754183 97%
30Thu Jul 28 09:51:27.001 857300 19937/second
31Thu Jul 28 09:51:27.793 check 9 878853
32Thu Jul 28 09:51:27.979 imported 878852 objects
进入命令行,看看库的整体情况:
1mongo
2use mydb
3show collections
4db.prj01.findOne()
完整结果如下:
1mongo
2MongoDB shell version: 2.4.14
3connecting to: test
4> use mydb
5switched to db mydb
6> show collections
7prj01
8system.indexes
9> db.prj01.findOne()
10{
11 "_id" : ObjectId("579964f41d36d69d1752f82b"),
12 "_projectid" : "7342bd01a2a7725ce033a179d22e382d",
13 "_teacher_acctid" : "5c43ef5eac0f5857c266baa1ccfa3d3f",
14 "_schoolid" : "9e72d6f2f1e9367b578b6479aa5852b7",
15 "school_ncesid" : NumberLong("360009702803"),
16 "school_latitude" : 40.688454,
17 "school_longitude" : -73.910432,
18 "school_city" : "Brooklyn",
19 "school_state" : "NY",
20 "school_zip" : 11207,
21 "school_metro" : "urban",
22 "school_district" : "New York City Dept Of Ed",
23 "school_county" : "Kings (Brooklyn)",
24 "school_charter" : "f",
25 "school_magnet" : "t",
26 "school_year_round" : "f",
27 "school_nlns" : "f",
28 "school_kipp" : "f",
29 "school_charter_ready_promise" : "f",
30 "teacher_prefix" : "Mr.",
31 "teacher_teach_for_america" : "f",
32 "teacher_ny_teaching_fellow" : "f",
33 "primary_focus_subject" : "Other",
34 "primary_focus_area" : "Applied Learning",
35 "secondary_focus_subject" : "",
36 "secondary_focus_area" : "",
37 "resource_type" : "Supplies",
38 "poverty_level" : "highest poverty",
39 "grade_level" : "Grades 6-8",
40 "vendor_shipping_charges" : "",
41 "sales_tax" : "",
42 "payment_processing_charges" : "",
43 "fulfillment_labor_materials" : "",
44 "total_price_excluding_optional_support" : 229,
45 "total_price_including_optional_support" : 279.27,
46 "students_reached" : 0,
47 "total_donations" : 251,
48 "num_donors" : 1,
49 "eligible_double_your_impact_match" : "f",
50 "eligible_almost_home_match" : "f",
51 "funding_status" : "completed",
52 "date_posted" : "2002-09-13 00:00:00",
53 "date_completed" : "2002-09-23 00:00:00",
54 "date_thank_you_packet_mailed" : "2003-01-27 00:00:00",
55 "date_expiration" : "2003-12-31 00:00:00"
56}
57>
太多字段了,如果我们就想要其中的6个字段:
1> db.prj01.findOne({}, {school_state:1, resource_type:1, poverty_level:1, date_posted:1, total_donations:1, funding_status:1, _id:0})
2{
3 "school_state" : "NY",
4 "resource_type" : "Supplies",
5 "poverty_level" : "highest poverty",
6 "total_donations" : 251,
7 "funding_status" : "completed",
8 "date_posted" : "2002-09-13 00:00:00"
9}
装个pymongo
1python -m pip install pymongo
测一下,进入python命令行:
1python
2...
3from pymongo import MongoClient
4MONGODB_HOST = 'localhost'
5MONGODB_PORT = 27017
6DBS_NAME = 'mydb'
7COLLECTION_NAME = 'prj01'
8FIELDS = {'school_state': True, 'resource_type': True, 'poverty_level': True, 'date_posted': True, 'total_donations': True, '_id': False}
9connection = MongoClient(MONGODB_HOST, MONGODB_PORT)
10collection = connection[DBS_NAME][COLLECTION_NAME]
11projects = collection.find(projection=FIELDS)
12for project in projects:
13 print project
14...
15{u'school_state': u'MO', u'date_posted': u'2015-08-18 00:00:00', u'poverty_level': u'highest poverty', u'resource_type': u'Books', u'total_donations': 0}
16...
数据会疯狂显示一阵子,其实这就是一个完整的python访问mongodb的程序了。
下面我们来完成flask的部分
装个flask
1python -m pip install flask
建立个文件夹flask01,建立目录templates
1flask01
2├── run.py
3└── templates
4 └── index.html
准备一个首页文件index.html放到目录templates下
1cat index.html
2<h1>Hello World</h1>
准备主程序run.py
1from flask import Flask
2from flask import render_template
3
4app = Flask(__name__)
5
6@app.route("/")
7def index():
8 return render_template("index.html")
9
10if __name__ == "__main__":
11 app.run(host='0.0.0.0',port=5000,debug=True)
分配个url来存取mongodb
1from flask import Flask
2from flask import render_template
3from pymongo import MongoClient
4import json
5from bson import json_util
6from bson.json_util import dumps
7
8app = Flask(__name__)
9
10MONGODB_HOST = 'localhost'
11MONGODB_PORT = 27017
12DBS_NAME = 'mydb'
13COLLECTION_NAME = 'prj01'
14FIELDS = {'school_state': True, 'resource_type': True, 'poverty_level': True, 'date_posted': True, 'total_donations': True, '_id': False}
15
16@app.route("/")
17def index():
18 return render_template("index.html")
19
20@app.route("/mydb/prj01")
21def mydb_prj01():
22 connection = MongoClient(MONGODB_HOST, MONGODB_PORT)
23 collection = connection[DBS_NAME][COLLECTION_NAME]
24 projects = collection.find(projection=FIELDS)
25 json_projects = []
26 for project in projects:
27 json_projects.append(project)
28 json_projects = json.dumps(json_projects, default=json_util.default)
29 connection.close()
30 return json_projects
31
32if __name__ == "__main__":
33 app.run(host='0.0.0.0',port=5000,debug=True)
运行一下,在浏览器打开这个url:
1python run.py
2 * Running on http://0.0.0.0:5000/ (Press CTRL+C to quit)
3 * Restarting with stat
4 * Debugger is active!
5 * Debugger pin code: 373-144-494
6172.16.8.1 - - [28/Jul/2016 10:13:27] "GET /mydb/prj01 HTTP/1.1" 200 -
结果很明显是个json
1[{"school_state": "NY", "date_posted": "2002-09-13 00:00:00", "poverty_level": "highest poverty", "resource_type": "Supplies", "total_donations": 251},
2 {"school_state": "NY", "date_posted": "2002-09-16 00:00:00", "poverty_level": "moderate poverty", "resource_type": "Supplies", "total_donations": 125},
3...
4]