免费代理对于咱们羊毛党来说是个好动西,但是免费的有好多的都不能用,况且还得一个一个试,累都累死了,于是写了这么一个小案例,用于减轻劳动。
爬虫代码
本案例是爬取的快代理的免费匿名代理,并自动检测代理是否可用,把可用的代理以json文件保存起来
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59
| from lxml import etree import requests import json
""" 打造自己的ip代理池 """
def get_ip_pool(): ip_pool = set() page = 5 for i in range(1, page + 1): url = 'https://free.kuaidaili.com/free/inha/' + str(i) r = requests.get(url) tree = etree.HTML(r.text) ip_list = tree.xpath( "/html/body/div[@class='body']/div[@id='content']/div[@class='con-body']/div[2]/div[@id='list']/table[" "@class='table table-bordered table-striped']/tbody/tr") for i in ip_list: ip = i.xpath("./td[1]/text()")[0] port = i.xpath("./td[2]/text()")[0] print("{}:{}".format(ip, port)) ip_pool.add(ip + ":" + port) return ip_pool
def get_active_ip(pool): ips = [] for item in pool: try: r = requests.get("http://4.ipw.cn/", proxies={"http": item}, timeout=1) if r.status_code == 200: print("{}可用---------------------".format(item)) ips.append(item) except Exception as e: print("{}不可用".format(item)) return ips
pool = get_ip_pool() ip_pool = get_active_ip(pool)
with open("ip.json", "w", encoding="utf-8") as f: f.write(json.dumps(ip_pool))
[ "112.6.117.178:8085", "223.96.90.216:8085", "106.55.15.244:8889", "58.20.184.187:9091", "120.194.55.139:6969", "122.9.101.6:8888" ]
|
使用代理
随机从中选择一个代理使用,具体发送http请求的代码自己实现吧
1 2 3 4 5 6 7 8
| import json import random f = open("ip.json", "r", encoding="utf-8") ip_pool = json.loads(f.read())
print(random.choice(ip_pool)) f.close()
|
__END__