环境Python3.6
#!/usr/bin/env python#-*-coding=utf-8 -*-#AUTHOR:duwentaoimport requestsimport rei = input("请输入你要爬取第几页:")url = "https://www.kuaidaili.com/free/inha/" + i +"/"print("获取代理IP地址")header = { "User-Agent":"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.221 Safari/537.36 SE 2.X MetaSr 1.0"}reponse = requests.get(url,header)reponse.encoding='utf-8'html = reponse.text#p = r'(.*?) (.*?) (.*?) (.*?) (.*?) (.*?) (.*?)'ip = r' (.*?)'IP = re.findall(ip,html,re.M|re.S)dk = r' (.*?)'DK = re.findall(dk,html,re.M|re.S)f=open("ip_list.txt","a")for ip in IP: IP_LIST = ip + ":" + DK[IP.index(ip)] + "\n" f.write(IP_LIST)f.close()print("保存完毕")