大小:未知
安全无毒免费软件无广告
1
|
pip install requests lxml xlrd xlwt xlutils |
01
02
03
04
05
06
07
08
09
10
|
alis = [ 'sh' , 'bj' , 'zj' , 'gd' ] for city in alis: if city = = 'sh' : num = 39 if city = = 'bj' : num = 69 if city = = 'zj' : num = 59 if city = = 'gd' : num = 67 |
001
002
003
004
005
006
007
008
009
010
011
012
013
014
015
016
017
018
019
020
021
022
023
024
025
026
027
028
029
030
031
032
033
034
035
036
037
038
039
040
041
042
043
044
045
046
047
048
049
050
051
052
053
054
055
056
057
058
059
060
061
062
063
064
065
066
067
068
069
070
071
072
073
074
075
076
077
078
079
080
081
082
083
084
085
086
087
088
089
090
091
092
093
094
095
096
097
098
099
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
|
import requests import xlrd import xlwt from xlutils.copy import copy from lxml import etree book_name_xls = 'University.xls' sheet_name_xls = 'University' def write_excel_xls(path, sheet_name, value): index = len (value) # 获取需要写入数据的行数 workbook = xlwt.Workbook() # 新建一个工作簿 sheet = workbook.add_sheet(sheet_name) # 在工作簿中新建一个表格 for i in range ( 0 , index): for j in range ( 0 , len (value[i])): sheet.write(i, j, value[i][j]) # 像表格中写入数据(对应的行和列) workbook.save(path) # 保存工作簿 print ( "xls格式表格写入数据成功!" ) def write_excel_xls_append(path, value): index = len (value) # 获取需要写入数据的行数 workbook = xlrd.open_workbook(path) # 打开工作簿 sheets = workbook.sheet_names() # 获取工作簿中的所有表格 worksheet = workbook.sheet_by_name(sheets[ 0 ]) # 获取工作簿中所有表格中的的第一个表格 rows_old = worksheet.nrows # 获取表格中已存在的数据的行数 new_workbook = copy(workbook) # 将xlrd对象拷贝转化为xlwt对象 new_worksheet = new_workbook.get_sheet( 0 ) # 获取转化后工作簿中的第一个表格 for i in range ( 0 , index): for j in range ( 0 , len (value[i])): new_worksheet.write(i + rows_old, j, value[i][j]) # 追加写入数据,注意是从i+rows_old行开始写入 new_workbook.save(path) # 保存工作簿 print ( "xls格式表格【追加】写入数据成功!" ) def read_excel_xls(path): workbook = xlrd.open_workbook(path) # 打开工作簿 sheets = workbook.sheet_names() # 获取工作簿中的所有表格 worksheet = workbook.sheet_by_name(sheets[ 0 ]) # 获取工作簿中所有表格中的的第一个表格 for i in range ( 0 , worksheet.nrows): for j in range ( 0 , worksheet.ncols): print (worksheet.cell_value(i, j), "\t" , end = "") # 逐行逐列读取数据 print () def getSchool(): alis = [ 'sh' , 'bj' , 'zj' , 'gd' ] for city in alis: if city = = 'sh' : num = 39 if city = = 'bj' : num = 69 if city = = 'zj' : num = 59 if city = = 'gd' : num = 67 r = requests.get(f 'http://daxue.eol.cn/{city}.shtml' ) r.encoding = 'utf-8' page = etree.HTML(r.text) for i in range (num): cengci_a = page.xpath(f '/html/body/div[3]/div[2]/div[2]/table/tbody/tr[{i+3}]/td[2]' ) for cengci in cengci_a: schools.append(cengci.text) return schools def getProjectSchool(): # 211 985 alis = [ '211' , '985' ] for project in alis: if project = = '211' : num = 116 # 116所 x = 3 if project = = '985' : num = 39 x = 2 r = requests.get(f 'https://daxue.eol.cn/{project}.shtml' ) r.encoding = 'utf-8' page = etree.HTML(r.text) for i in range (num): tag_a = page.xpath(f '/html/body/div[3]/div[2]/div[{x}]/table/tbody/tr[{i+1}]/td[1]/a' ) for a in tag_a: schools.append(a.text) tag_b = page.xpath(f '/html/body/div[3]/div[2]/div[{x}]/table/tbody/tr[{i+1}]/td[2]/a' ) for b in tag_b: schools.append(b.text) return schools def Query(school): url = 'https://sp0.baidu.com/8aQDcjqpAAV3otqbppnN2DJv/api.php' headers = { 'User-Agent' : 'Mozilla / 5.0 (Windows NT 10.0 ; Win64; x64) \ AppleWebKit / 537.36 (KHTML, like Gecko) Chrome / 75.0 . 3770.90 Safari / 537.36 ' } params = { 'resource_id' : '34559' , 'query' : school, 'co' : f 'tr[select1={myCity},select2={mySubject}]|th[select1={myCity},select2={mySubject}]' , 'format' : 'json' , 'oe' : 'utf-8' , 'ie' : 'utf-8' , '_' : '1561384786050' } r = requests.get(url, params = params, headers = headers).json() for i in range ( len (r[ 'data' ][ 0 ][ 'tr' ])): year = r[ 'data' ][ 0 ][ 'tr' ][i][ 'col' ][ 0 ][ 'info' ][ 0 ][ 'text' ] average = r[ 'data' ][ 0 ][ 'tr' ][i][ 'col' ][ 1 ][ 'info' ][ 0 ][ 'text' ] low = r[ 'data' ][ 0 ][ 'tr' ][i][ 'col' ][ 2 ][ 'info' ][ 0 ][ 'text' ] shengkong = r[ 'data' ][ 0 ][ 'tr' ][i][ 'col' ][ 3 ][ 'info' ][ 0 ][ 'text' ] batch = r[ 'data' ][ 0 ][ 'tr' ][i][ 'col' ][ 4 ][ 'info' ][ 0 ][ 'text' ] value1 = [[year, average, low, shengkong, batch, school],] write_excel_xls_append(book_name_xls, value1) read_excel_xls(book_name_xls) def main(): value_title = [[ "年份" , "平均分" , "最低分" , "省控线" , "批次" , "学校" ],] write_excel_xls(book_name_xls, sheet_name_xls, value_title) schools = getSchool() for school in schools: try : Query(school) except : pass if __name__ = = '__main__' : schools = [] myCity = '江西' mySubject = '理科' main() |
展开
友情提醒:请点击右上角的微信菜单选择使用浏览器打开下载(因为微信中不提供下载功能),点击任意处可关闭该提示信息,谢谢~