python多线程检测代理ip可用性
测试样本
# ip.txt 110.52.235.87:9999@http#[高匿]湖南省岳阳市 联通 111.43.70.58:51547@http#[未知]黑龙江省 移动(全省通用) 183.196.97.125:41397@http#[未知]河北省廊坊市 移动 110.52.235.210:9999@http#[高匿]湖南省岳阳市 联通 183.166.167.163:8080@http#[未知]安徽省黄山市 电信 111.177.171.242:9999@http#[未知]湖北省随州市 电信 123.127.93.188:44399@http# .... ....
python脚本
需要安装requests库
pip install requests
# coding=utf-8
#
import os
import re
import requests
import threading
from datetime import datetime
from requests import requestexception
from time import sleep
base_dir = os.path.dirname(__file__)
ip_txt = os.path.join(base_dir, datetime.now().strftime('%y-%m-%d') + '-ip.txt')
scr_ip_txt = os.path.join(base_dir, 'ip.txt')
max_test_threads = 100
class testthread(threading.thread):
def __init__(self, _ip_li):
self.li = _ip_li
super(testthread, self).__init__()
def run(self):
self.li = [_ for _ in self.li if self.test_ip_available(_)]
def test_ip_available(self, ip):
_proxy = {'https': ip}
try:
print u'正在检测ip: %s 有效性\n' % ip
r = requests.get('https://www.so.com/s?ie=utf-8&fr=none&src=360sou_newhome&q=123',
proxies=_proxy,
timeout=5)
assert u'_360搜索' in r.text
except (requestexception, assertionerror):
return false
print u'找到可用代理ip: %s\n' % ip
return true
def time_wrapper(func):
def _wrapper():
start_time = datetime.now()
func()
end_time = datetime.now()
seconds = (end_time - start_time).total_seconds()
print u'本次执行共消耗: %d分%d秒\n' % (seconds / 60, seconds % 60)
return _wrapper
@time_wrapper
def parse_ip():
# 读取本地ip文件
with open(scr_ip_txt) as f:
_ip_list = re.findall(r'\d+\.\d+\.\d+\.\d+:\d+', f.read())
print u'检索到: %d 个ip地址\n' % len(_ip_list)
# 平均分配_ip_list到各个线程检测
threads = []
avg = len(_ip_list) / max_test_threads
if len(_ip_list) % max_test_threads != 0:
avg += 1
for i in range(max_test_threads):
_thread = testthread(_ip_list[i*avg:(i+1)*avg])
threads.append(_thread)
_thread.start()
# 等待所有检测线程退出
while threading.active_count() > 1:
sleep(10)
# 读取所有有效ip并写入文件
_ip_list = []
for th in threads:
_ip_list.extend(th.li)
_ip_list = set(_ip_list)
print u'\n总共找到 %d 个可用ip\n' % len(_ip_list)
with open(ip_txt, 'w') as f:
f.write('\n'.join(_ip_list))
if __name__ == '__main__':
parse_ip()
结果
检索到: 1899 个ip地址 ...... ...... ...... 正在检测ip: 124.81.245.148:8080 有效性 正在检测ip: 111.177.160.17:9999 有效性 总共找到 60 个可用ip 本次执行共消耗: 2分13秒
总结
以上为个人经验,希望能给大家一个参考,也希望大家多多支持代码网。
发表评论