python matplotlib 年代-年代count-直方图

数据库中每条记录存在一个字段叫做year,现在需要统计所有year出现的次数,并画出直方图

代码

# 获取数据库中所有的year,构成一个集合
import os
import sqlite3
from collections import Counter
import matplotlib.pyplot as plt

# 从数据库中的获取到所有文章年代的集合
def get_year_set():
	dbname = "../allmessage.db"
	if (not os.path.exists(dbname)):
		print("The database is not exist!")
		return -1
	filename = "year_set.txt"
	if (os.path.exists(filename)):
		print("The "+filename+" is exist, and the programming is removing!")
		os.remove(filename)

	f = open(filename, "a")
	conn = sqlite3.connect(dbname)
	cursor = conn.cursor()
	cursor.execute("select year from table1")
	rows = cursor.fetchall()
	for row in rows:
		row = list(row)
		f.write(str(row[0]))
		f.write(",")
	conn.commit()
	cursor.close()
	conn.close()
	f.close()

# 从年代中得到年代,和年代的数量 文件
def get_year_count():
	filename = "year_set.txt"
	if (not os.path.exists(filename)):
		print("The "+filename+" is not exists")
		return -1
	f = open(filename, "r")

	filename_write = "year_count.txt"
	if (os.path.exists(filename_write)):
		os.remove(filename_write)
	f_write = open(filename_write, "a")

	line = f.readline().strip("\n").split(",")
	line = line[:-1]
	myset = set(line)
	elementCounter = Counter(line)
	for item in myset:
		f_write.write(str(item))
		f_write.write(",")
		f_write.write(str(elementCounter[item]))
		f_write.write("\n")
	f_write.close()
	f.close()

# 按照年代的大小进行排序
def sort_year_count():
	filename_read = "year_count.txt"
	if (not os.path.exists(filename_read)):
		print("The file is not exists!")
		return -1
	f_read = open(filename_read, "r")

	filename_write = "year_count_sort.txt"
	if (os.path.exists(filename_write)):
		os.remove(filename_write)
	f_write = open(filename_write, "a")

	sort_list = []
	line = f_read.readline()
	while line:
		line = line.strip("\n").split(",")
		a = line[0]
		b = line[1]
		temp_tuple = (a, b)
		sort_list.append(temp_tuple)
		line = f_read.readline()

	sort_list = sorted(sort_list, key=lambda allref_tuple:allref_tuple[0])
	for item in sort_list:
		if item[0] == '':
			f_write.write('0')
			f_write.write(",")
			f_write.write(str(item[1]))
			f_write.write("\n")
			continue
		f_write.write(str(item[0]))
		f_write.write(",")
		f_write.write(str(item[1]))
		f_write.write("\n")
	f_write.close()
	f_read.close()

# 得到直方图
def get_graph():
	filename = "year_count_sort.txt"
	if (not os.path.exists(filename)):
		print("The filename is not exists!")
		return -1
	f = open(filename, "r")
	line = f.readline()
	x = []
	y = []
	while line:
		line = line.strip("\n").split(",")
		x.append(int(line[0]))
		y.append(int(line[1]))
		line = f.readline()
	plt.bar(x, y, alpha = .5, log=True, color = 'g') # 以log形式展示
	plt.xlabel("Year")
	plt.ylabel("Count")
	plt.xlim(1935, 2015)
	plt.ylim(0, 200761)
	plt.title("Year-Count")
	plt.savefig("year_count.png", format="png")

if __name__ == '__main__':
	get_year_set()
	get_year_count()
	sort_year_count()
	get_graph()

效果图:

year_count

 

 

 

发表回复

您的电子邮箱地址不会被公开。 必填项已用*标注

开始在上面输入您的搜索词,然后按回车进行搜索。按ESC取消。

返回顶部