import requests
from bs4 import BeautifulSoup
response = requests.get("https://www.autohome.com.cn/news/")
# 1. content /text 的区别
# print(response.content) # content 拿到的字节
response.encoding = 'gbk'
# print(response.text) # text 拿到的文本信息
soup = BeautifulSoup(response.text,'html.parser')
# tag = soup.find(id='auto-channel-lazyload-article') # 找唯一的值,缩小范围
# h3 = tag.find(name='h3',class_ ='') # class是关键词所以要加下划线, 或者使用下面的方式
# h3 = tag.find(name='h3',attrs= {'class':''}) #
# print(h3)
# 链式写法
li_list = soup.find(id='auto-channel-lazyload-article').find_all(name='li')
for li in li_list:
title = li.find('h3') #获取标题
if not title:# 如果为null,跳出
continue
title = title.text
summary = li.find("p").text
url = li.find("a").get('href')
img = li.find("img").get('src')
print(img)
# 保存图片
res = requests.get(img)
file_name = "%s.jpg"%(title,)
with open(file_name,'wb') as f:
f.write(res.content) #保存字节内容
更多精彩文章请关注 王明昌博客