python爬取HTML代码
# -*- coding:UTF-8 -*- import os import os.path import requests def download(url): req = requests.get(url) req.encoding = 'utf-8' if req.status_code == 404: print("404错误") with open('String.txt','wb') as filename: filename.write(req.text.encode()) print("下载完成") if __name__ == '__main__': url = input("请输入URL: ") download('http://'+ url)
其中的重点是:
req.encoding = "utf-8" 如果没有这行代码,爬取下来的HTML将会是乱码 req.text.encode() 后面的这个 encode()是为了把bytes转为str
评论 2
感谢分享!
感谢分享!