blog新地址 Surprised => indulgence => Calm => Lost => Panic =>Calm

Python对Sqlite3数据库操作

Python admin 1200℃ 0评论

创建数据库/插入数据库实例

# 对paper文本进行数据库录入工作和统计年份,并画图
# 2016年07月12日22:17:39
# Linux python3
# author lee

import sqlite3
import os
import time

def createDatabase(dbname):
	if os.path.exists(dbname):
		os.remove(dbname)

	conn = sqlite3.connect(dbname)
	cursor = conn.cursor()

	sql_createDatabase = "create table paper(index_id int primary key, \
	Paper_ID text, \
	Original_Paper_Title text, \
	Normalized_Paper_Title text, \
	Paper_Publish_Year int, \
	Paper_Publish_Date text, \
	Paper_DOI text, \
	Original_Venue_Name text, \
	Normalized_Venue_Name text, \
	Journal_ID_Mapped_To_Venue_Name text, \
	Conference_Series_ID_Mapped_To_Venue_Name text, \
	Paper_Rank text)"
	cursor.execute(sql_createDatabase)
	conn.commit()
	cursor.close()
	conn.close()
	print("datebase created success!")

def insertDatabase(filename, dbname):
	conn = sqlite3.connect(dbname)
	cursor = conn.cursor()

	f = open(filename, "rb")
	line = f.readline().decode('utf-8')
	index = 0
	count_time = 0 # 计时器,没有什么用的
	while line:
		index = index + 1
		line = line.strip("\n")
		rows = line.split("\t")
		cursor.execute("insert into paper(index_id, Paper_ID, Original_Paper_Title, Normalized_Paper_Title, Paper_Publish_Year, \
			Paper_Publish_Date, Paper_DOI, Original_Venue_Name, Normalized_Venue_Name, Journal_ID_Mapped_To_Venue_Name, Conference_Series_ID_Mapped_To_Venue_Name, Paper_Rank) values(?,?,?,?,?,?,?,?,?,?,?,?)", \
		(index, rows[0], rows[1], rows[2], int(rows[3]), rows[4], rows[5], rows[6], rows[7], rows[8], rows[9], rows[10]))
		line = f.readline().decode('utf-8')

		count_time = count_time + 1
		if count_time > 5000000:
			print("There 5000000.")
			count_time = 0

	conn.commit()
	cursor.close()
	conn.close()
	print("The most index is "+str(index))
	print("Insert Database success.")

def work_to_database():
	filename = r"E:\download\thunder\Papers\Papers.txt"
	dbname = r"E:\download\thunder\Papers\paper.db"
	createDatabase(dbname)
	insertDatabase(filename, dbname)

if __name__ == '__main__':
	start = time.clock()
	work_to_database()
	end = time.clock()
	print(end-start)

其中因为后面在是在window下跑了一下,出现了不明的字符编码错误,

所以在读取文件的时候是以二进制的方式读取的,然后再用UTF8的方式decode

从数据库中读取数据

# 从数据库中读取数据,画图
# 2016年07月13日00:30:13
# author lee
# Linux python3

import os
import time
import sqlite3
from collections import Counter

def getPaintData(filename, dbname):
	if os.path.exists(filename):
		os.remove(filename)
	conn = sqlite3.connect(dbname)
	cursor = conn.cursor()

	f = open(filename, "a")
	sql_select_year_before = "select index_id, Paper_Publish_Year from paper order by index_id asc limit 1"
	cursor.execute(sql_select_year_before)
	row = cursor.fetchone()
	row_list = list(row)
	f.write(str(row_list[1]))
	f.write(",")
	count_time = 0
	while True:
		count_time = count_time + 1
		sql_select_year_after = "select index_id, Paper_Publish_Year from paper where index_id > " + str(row_list[0]) + " order by index_id asc limit 1"
		cursor.execute(sql_select_year_after)
		row = cursor.fetchone()
		if row == None:
			break
		if count_time > 126909021:
			break
		row_list = list(row)
		f.write(str(row_list[1]))
		f.write(",")

	f.close()
	conn.commit()
	cursor.close()
	conn.close()
	print("getPaintData Success!")

def getCountPaintData(filename, filename_count):
	if os.path.exists(filename_count):
		os.remove(filename_count)
	f = open(filename, "r")
	line = f.readline().strip("\n").split(",")
	line = line[:-1]
	myset = set(line)
	elementCounter = Counter(line)

	f_count = open(filename_count, "a")
	for item in myset:
		f_count.write(str(item))
		f_count.write(",")
		f_count.write(str(elementCounter[item]))
		f_count.write("\n")
	f_count.close()
	f.close()
	print("getCountPaintData Success!")

def getSortPaintData(filename_count, filename_sort):
	if os.path.exists(filename_sort):
		os.remove(filename_sort)

	f_count = open(filename_count, "r")
	f_sort = open(filename_sort, "a")
	sort_list = []
	line = f_count.readline()
	while line:
		line = line.strip("\n").split(",")
		a = line[0]
		b = line[1]
		temp_tuple = (a, b)
		sort_list.append(temp_tuple)
		line = f_count.readline()

	sort_list = sorted(sort_list, key=lambda allref_tuple:allref_tuple[0])
	for item in sort_list:
		f_sort.write(str(item[0]))
		f_sort.write(",")
		f_sort.write(str(item[1]))
		f_sort.write("\n")
	f_sort.close()
	f_count.close()
	print("getSortPaintData Success!")

def paint():
	filename = "./src/year_set.txt"
	dbname = "./src/paper.db"
	filename_count = "./src/year_count.txt"
	filename_sort = "./src/year_count_sort.txt"
	getPaintData(filename, dbname)
	getCountPaintData(filename, filename_count)
	getSortPaintData(filename_count, filename_sort)

if __name__ == '__main__':
	paint()

这里执行查询的时候,其实是对整个数据表进行查询,这个数据表特别大,

如果使用fetchall的方式查询的话,消耗的时间特别多,所以用fetchone的方式查询。

使用fetchone方式进行查询,按照id一行一行得往下查,使用limit限制查询数量

 

 

转载请注明:Imekaku-Blog » Python对Sqlite3数据库操作

喜欢 (0)or分享 (0)
发表我的评论
取消评论

表情

Hi,您需要填写昵称和邮箱!

  • 昵称 (必填)
  • 邮箱 (必填)
  • 网址