数据库中每条记录存在一个字段叫做year,现在需要统计所有year出现的次数,并画出直方图
代码
# 获取数据库中所有的year,构成一个集合 import os import sqlite3 from collections import Counter import matplotlib.pyplot as plt # 从数据库中的获取到所有文章年代的集合 def get_year_set(): dbname = "../allmessage.db" if (not os.path.exists(dbname)): print("The database is not exist!") return -1 filename = "year_set.txt" if (os.path.exists(filename)): print("The "+filename+" is exist, and the programming is removing!") os.remove(filename) f = open(filename, "a") conn = sqlite3.connect(dbname) cursor = conn.cursor() cursor.execute("select year from table1") rows = cursor.fetchall() for row in rows: row = list(row) f.write(str(row[0])) f.write(",") conn.commit() cursor.close() conn.close() f.close() # 从年代中得到年代,和年代的数量 文件 def get_year_count(): filename = "year_set.txt" if (not os.path.exists(filename)): print("The "+filename+" is not exists") return -1 f = open(filename, "r") filename_write = "year_count.txt" if (os.path.exists(filename_write)): os.remove(filename_write) f_write = open(filename_write, "a") line = f.readline().strip("\n").split(",") line = line[:-1] myset = set(line) elementCounter = Counter(line) for item in myset: f_write.write(str(item)) f_write.write(",") f_write.write(str(elementCounter[item])) f_write.write("\n") f_write.close() f.close() # 按照年代的大小进行排序 def sort_year_count(): filename_read = "year_count.txt" if (not os.path.exists(filename_read)): print("The file is not exists!") return -1 f_read = open(filename_read, "r") filename_write = "year_count_sort.txt" if (os.path.exists(filename_write)): os.remove(filename_write) f_write = open(filename_write, "a") sort_list = [] line = f_read.readline() while line: line = line.strip("\n").split(",") a = line[0] b = line[1] temp_tuple = (a, b) sort_list.append(temp_tuple) line = f_read.readline() sort_list = sorted(sort_list, key=lambda allref_tuple:allref_tuple[0]) for item in sort_list: if item[0] == '': f_write.write('0') f_write.write(",") f_write.write(str(item[1])) f_write.write("\n") continue f_write.write(str(item[0])) f_write.write(",") f_write.write(str(item[1])) f_write.write("\n") f_write.close() f_read.close() # 得到直方图 def get_graph(): filename = "year_count_sort.txt" if (not os.path.exists(filename)): print("The filename is not exists!") return -1 f = open(filename, "r") line = f.readline() x = [] y = [] while line: line = line.strip("\n").split(",") x.append(int(line[0])) y.append(int(line[1])) line = f.readline() plt.bar(x, y, alpha = .5, log=True, color = 'g') # 以log形式展示 plt.xlabel("Year") plt.ylabel("Count") plt.xlim(1935, 2015) plt.ylim(0, 200761) plt.title("Year-Count") plt.savefig("year_count.png", format="png") if __name__ == '__main__': get_year_set() get_year_count() sort_year_count() get_graph()
效果图: