爬取vmgirls美女图片

爬取vmgirls美女图片

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
import requests
from lxml import etree
from hashlib import md5
import re

def get_page(page):
data={
"append": "list-archive",
"paged": page,
"action": "ajax_load_posts",
"query": "25",
"page": "cat"
}

headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.106 Safari/537.36",
"cookie": "_ga=GA1.2.879115049.1597016349; _gid=GA1.2.29282622.1597016349; Hm_lvt_a5eba7a40c339f057e1c5b5ac4ab4cc9=1597016349; _GPSLSC=; verynginx_sign_cookie=39154348106419081c88a4fccdff4f0a; verynginx_sign_javascript=ebf13703c41835d486309e30ef152c1e; Hm_lpvt_a5eba7a40c339f057e1c5b5ac4ab4cc9=1597045709; _gat_gtag_UA_127463675_2=1"
}
re=requests.post("https://www.vmgirls.com/wp-admin/admin-ajax.php",data=data,headers=headers)
html=etree.HTML(re.text)
res=html.xpath("//div[@class='list-body']/a/@href")

return res

def get_detials(res):
headers={
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.106 Safari/537.36",
"cookie": "_ga=GA1.2.879115049.1597016349; _gid=GA1.2.29282622.1597016349; Hm_lvt_a5eba7a40c339f057e1c5b5ac4ab4cc9=1597016349; _GPSLSC=; verynginx_sign_cookie=39154348106419081c88a4fccdff4f0a; verynginx_sign_javascript=ebf13703c41835d486309e30ef152c1e; Hm_lpvt_a5eba7a40c339f057e1c5b5ac4ab4cc9=1597046540; _gat_gtag_UA_127463675_2=1"
}
url=[]
for i in res:
result=requests.get(i,headers=headers)
urllist=re.findall('<a href="(.*?)" alt=".*?" title=".*?">',result.text)
url.append(urllist)
urls=[]
for i in url:
for j in i:
j="https://www.vmgirls.com/"+j
urls.append(j)
return urls

def download(url):
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.106 Safari/537.36",
"cookie": "__ga=GA1.2.879115049.1597016349; _gid=GA1.2.29282622.1597016349; Hm_lvt_a5eba7a40c339f057e1c5b5ac4ab4cc9=1597016349; _GPSLSC=; verynginx_sign_cookie=39154348106419081c88a4fccdff4f0a; verynginx_sign_javascript=ebf13703c41835d486309e30ef152c1e; Hm_lpvt_a5eba7a40c339f057e1c5b5ac4ab4cc9=1597045839"
}
for i in url:
img=requests.get(i,headers=headers,timeout=(3,7))
with open(f"./imgs/{md5(img.content).hexdigest()}.jpeg","wb") as f:
f.write(img.content)
print(f"下载图片:{md5(img.content).hexdigest()}.jpeg")

def main():
for i in range(1,45):
print(f"正在爬取第{i}页~~~")
res=get_page(i)
url=get_detials(res)
download(url)

main()