
帮朋友抓了一些代理IP,并根据测试联的通性,放在了不通的文件夹下。特将源码分享
注意:
1,环境Python3.5
2,安装BeautifulSoup4 requests
代码如下:
| 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 | #-*- coding:gb18030 -*-from bs4 import BeautifulSoupimport requestsimport timeimport os,sysall_url_add={ 'url2':'http://ip84.com/gn/', }def func(url): r = requests.get(url) content = r.text soup = BeautifulSoup(content, "html.parser") ListTable = soup.find_all("table", class_="list") for table in ListTable: ListTr = table.find_all("tr") for tr in ListTr: try: ListTd = tr.find_all("td") ipaddress = str(ListTd[0].get_text()).strip() port = str(ListTd[1].get_text()).strip() city = str(ListTd[2].get_text()).strip().replace("\n", "") leixing = str(ListTd[3].get_text()).strip() xieyi = str(ListTd[4].get_text()).strip() shudu = str(ListTd[5].get_text()).strip() time1 = str(ListTd[6].get_text()).strip() f = open("ip" + '.txt', 'a') f.write(ipaddress+":"+port+'\n') f.close() print('地址:'+ipaddress + "端口:" + port + "地区:" + city + "类型:" + leixing + "协议" + xieyi + "速度" + shudu + "时间:" + time1) except Exception as e: print (u"-------------------程序异常-----------------------") return 'success' print (u'本页抓取结束,正在跳转下一页')def pin(): f2 = open('ip.txt', 'r') count = len(open('ip.txt', 'rU').readlines()) for x in range(count): ip = f2.readline().split(':')[0] return1 = os.system('ping -n 5 -w 5 %s' % ip) if return1: print('测试失败') else: print('测试成功,正在写入新文件') f3 = open('SuccessIp.txt', 'a') f3.write(f2.readline() + '\n') f3.close() f2.close() print('程序结束,可用IP已放在SuccessIp中')if __name__=='__main__': for x in all_url_add: print (x) for y in range(1,50): url=all_url_add[x]+str(y) print (url) status=func(url) if status=='success': print(y,'页结束') print (u'****程序抓取运行结束,正在检查所得IP连通性,请勿关闭窗口*****') pin() |
有点乱,有时间将数据存储在数据库,再将这个功能集成在博客当中。
Rex博客保留所有权利