爬虫爬取某网站壁纸+Requset库的使用+图片下载

[toc]

网站 url:壁纸链接

完整代码

import requests

start = 1 # 开始页数
count = 12 # 每一页10张图片
header = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
}
def getInterface(url):

r = requests.get(url,headers=header,timeout=10)
r.encoding = 'utf-8'
# print(r.json())
data = r.json()
# print(type(data))
# print(type(data['data']))
str2 = ""
# exit()
for item in data['data']:
str2+=str(item['img_1600_900']) + "\n"
pass
return str2
def download():
savepath = "./图片/"
f = open("./data2.txt", "r+")
data = f.read().split("\n")
for item in range(0, 24):
cont = requests.get(url=data[item], headers=header)
f = open(savepath + str(item) + ".jpg", "wb")
f.write(cont.content)
f.close()
if __name__ == '__main__':
f = open("./data2.txt","w+")
for i in range(0,2):
url = 'http://wallpaper.apc.360.cn/index.php?c=WallPaper&start={0}&count={1}&from=360chrome&a=getAppsByCategory&cid=26'.format(
start, count)
print(url)
str2 = getInterface(url)
f.write(str2)
start+=count
print(str(start)+'->')
pass
print("写入完毕")
f.close()
download()




补充一下Request库知识

import requests
headers = {"Accept":"text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'} # # 在请求头中带上User-Agent,模拟浏览器发送请求
# r = requests.get('http://wallpaper.apc.360.cn/index.php?c=WallPaper&start=18&count=15&from=360chrome&a=getAppsByCategory&cid=26',headers=headers)
# print(r.status_code)
# r.encoding = 'utf-8' # 指定页面载入编码为 utf-8
# print(r.encoding)
# # print(r.text) # 页面内容
# print(r.json()) # 如果页面是JSON格式的 可以通过这个来获取到内容
# print(r.headers) # 获取对象的请求头

# 关于发送带请求参数
# 1. 直接在URL 后面拼接 例如:

# url = 'https://www.baidu.com/s?wd=python'
# # timeout 超过该时间未响应即停止
# r2 = requests.get(url, headers=headers,timeout=5)
# print(r2.text)

# 2. 将一个列表作为值传入
url2 = 'https://www.baidu.com/s?'
question = {'wd':'python'}
r3 = requests.get(url2,params=question,headers=headers)
r3.encoding = 'utf-8'
print(r3.text)

获取源站url

https://abc.flya.top/img/215

打开这个url (注 浏览器已安装JSON插件)发现是JSON 格式的

https://abc.flya.top/img/213

分析一下这个url start 为开始页数 count 为一页展示多少条

于是 我们可以把这两个 作为变量 然后就可以爬取多张图片了

import requests
header = {
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
}
start = 1 # 开始页数
count = 10 # 每一页10张图片
url = 'http://wallpaper.apc.360.cn/index.php?c=WallPaper&start={0}&count={1}&from=360chrome&a=getAppsByCategory&cid=26'.format(start,count)


图片链接保存代码

import requests

start = 1 # 开始页数
count = 10 # 每一页10张图片

def getInterface(url):
header = {
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
}
r = requests.get(url,headers=header,timeout=10)
r.encoding = 'utf-8'
# print(r.json())
data = r.json()
# print(type(data))
# print(type(data['data']))
str2 = ""
# exit()
for item in data['data']:
str2+=str(item['img_1600_900']) + "\n"
pass
return str2
if __name__ == '__main__':
f = open("./data2.txt","w+")

for i in range(0,2):
url = 'http://wallpaper.apc.360.cn/index.php?c=WallPaper&start={0}&count={1}&from=360chrome&a=getAppsByCategory&cid=26'.format(
start, count)
print(url)
str2 = getInterface(url)
f.write(str2)
start+=count
print(str(start)+'->')
pass
print("写入完毕")
f.close()




https://abc.flya.top/img/214


图片下载

def download():
savepath = "./图片/"
f = open("./data2.txt", "r+")
data = f.read().split("\n")
for item in range(0, 24):
cont = requests.get(url=data[item], headers=header)
f = open(savepath + str(item) + ".jpg", "wb")
f.write(cont.content)
f.close()

效果展示

https://abc.flya.top/img/216

https://abc.flya.top/img/212