爬取哔哩哔哩舞蹈区

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
import requests
import time
from urllib.parse import urlencode
import pymysql


headers={
"cookie":"_uuid=foc; buvid3=41B28D1E-E371-4F67-8686-8E0EB99F8B481infoc; sid=5ooa96%2C2bc8c*a1; bili_jct=fc45930f40013b35b8f9d8a522fe7dd8; CURRENT_FNVAL=80; blackside_state=1; rpdid=|(u|JRRkuJu)0J'uY|Ruu~kRR; LIVE_BUVID=AUTO8516042225840034; CURRENT_QUALITY=80; PVID=1; fingerprint3=ead95e7-8686-8E0EB99F8B48138400infoc; buvid_fp_plain=41B28D1E-E371-4F67-8686-8E0EB99F8B48138400infoc; fingerprint_s=80495dfa66482f9ac8836a3ced34f489; bp_video_of=476757016423436327; bfe_id=1bad38f44e358ca77469025e0405c4a6",
'user-agent':'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36 FS'
}
count=0
def getConn(l):
global count
count+=1
db=pymysql.connect('localhost','root','123456','spider')
cursor=db.cursor()
sql='insert into bilibili values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s);'
cursor.execute(sql,l)
db.commit()
print(f"第{count}条数据插入成功~~~")
def getData(page):
param={
'rid': '20',
'type': '0',
'pn': page,
'ps': '20',
'jsonp': 'jsonp'
}
res=requests.get('https://api.bilibili.com/x/web-interface/newlist?'+urlencode(param),headers=headers)
data=res.json()['data']['archives']
for i in data:
l=[]
stat = i['stat']
pubdate=time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(i["pubdate"]))
# print(f'BV:{i["bvid"]}\t标题:{i["title"]}\t'
# f'发布日期:{time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(i["pubdate"]))}\t简介:{i["desc"]}\tup主:{[i["owner"]["name"]]}')
# print(f'硬币:{stat["coin"]}\t弹幕:{stat["danmaku"]}\t不喜欢:{stat["dislike"]}\t收藏:{stat["favorite"]}\t'
# f'点赞:{stat["like"]}\t评论:{stat["reply"]}\t分享:{stat["share"]}\t播放量:{stat["view"]}')
l=[i["bvid"],i["title"],pubdate,i["desc"],[i["owner"]["name"]],stat["coin"],stat["danmaku"],stat["dislike"],stat["favorite"],stat["like"],stat["reply"],stat["share"],stat["view"]]
try:
getConn(l)
except:
print("插入失败")

if __name__ == '__main__':
for i in range(1,10476):
getData(i)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
/*
Navicat MySQL Data Transfer

Source Server : localhost
Source Server Version : 50721
Source Host : localhost:3306
Source Database : spider

Target Server Type : MYSQL
Target Server Version : 50721
File Encoding : 65001

Date: 2021-01-07 20:50:10
*/

SET FOREIGN_KEY_CHECKS=0;

-- ----------------------------
-- Table structure for bilibili
-- ----------------------------
DROP TABLE IF EXISTS `bilibili`;
CREATE TABLE `bilibili` (
`bv` varchar(255) NOT NULL,
`title` varchar(255) DEFAULT NULL,
`pubdate` datetime DEFAULT NULL,
`desc` varchar(255) DEFAULT NULL,
`up` varchar(255) DEFAULT NULL,
`coin` varchar(255) DEFAULT NULL,
`danmaku` varchar(255) DEFAULT NULL,
`dislike` varchar(255) DEFAULT NULL,
`favorite` varchar(255) DEFAULT NULL,
`like` varchar(255) DEFAULT NULL,
`reply` varchar(255) DEFAULT NULL,
`shart` varchar(255) DEFAULT NULL,
`view` varchar(255) DEFAULT NULL,
PRIMARY KEY (`bv`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;

sm0pid.png