python爬取HTML代码

# -*- coding:UTF-8 -*-
import os
import os.path
import requests
def download(url):
    req = requests.get(url)
    req.encoding = 'utf-8'
    if req.status_code == 404:
        print("404错误")
    with open('String.txt','wb') as filename:
        filename.write(req.text.encode())
        print("下载完成")
if __name__ == '__main__':
    url = input("请输入URL: ")
    download('http://'+ url)

其中的重点是:

req.encoding = "utf-8"  如果没有这行代码,爬取下来的HTML将会是乱码

req.text.encode() 后面的这个 encode()是为了把bytes转为str