IO多路复用
作用:检测多个socket是否已经发生变化(是否已经连接成功/是否已经获取数据)
setblocking(Flase)
让原本阻塞的变成非阻塞(不等待)状态
import socket
import selectsk = socket.socket()
sk.setblocking(False)
try:sk.connect(("www.baidu.com",80))
except BlockingIOError as e:passsk1 = socket.socket()
sk1.setblocking(False)
try:sk1.connect(("www.douban.com",80))
except BlockingIOError as e:passsk2 = socket.socket()
sk2.setblocking(False)
try:sk2.connect(("www.zhihu.com",80))
except BlockingIOError as e:pass
基于IO多路复用+socket实现并发请求
单线程的并发
import socket
import selectclient1 = socket.socket()
client1.setblocking(False) # 百度创建连接: 非阻塞try:client1.connect(('www.baidu.com',80))
except BlockingIOError as e:passclient2 = socket.socket()
client2.setblocking(False) # 百度创建连接: 非阻塞
try:client2.connect(('www.sogou.com',80))
except BlockingIOError as e:passclient3 = socket.socket()
client3.setblocking(False) # 百度创建连接: 非阻塞
try:client3.connect(('www.oldboyedu.com',80))
except BlockingIOError as e:passsocket_list = [client1,client2,client3]
conn_list = [client1,client2,client3]while True:rlist,wlist,elist = select.select(socket_list,conn_list,[],0.005)# wlist中表示已经连接成功的socket对象for sk in wlist:if sk == client1:sk.sendall(b'GET /s?wd=alex HTTP/1.0\r\nhost:www.baidu.com\r\n\r\n')elif sk==client2:sk.sendall(b'GET /web?query=fdf HTTP/1.0\r\nhost:www.sogou.com\r\n\r\n')else:sk.sendall(b'GET /s?wd=alex HTTP/1.0\r\nhost:www.oldboyedu.com\r\n\r\n')conn_list.remove(sk)for sk in rlist:chunk_list = []while True:try:chunk = sk.recv(8096)if not chunk:breakchunk_list.append(chunk)except BlockingIOError as e:breakbody = b''.join(chunk_list)# print(body.decode('utf-8'))print('------------>',body)sk.close()socket_list.remove(sk)if not socket_list:break
单线程的并发高级
基于事件循环实现的异步非阻塞框架:lzl
Python中有开源 基于事件循环实现的异步非阻塞框架 Twisted
非阻塞:让过程不再等待,但是会报BLockIOError的错误,只要捕获就可以
异步:执行完某个任务后自动调用我给他的函数,也就是回调函数
import socket
import selectclass Req(object):def __init__(self,sk,func):self.sock = skself.func = funcdef fileno(self):return self.sock.fileno()class Nb(object):def __init__(self):self.conn_list = []self.socket_list = []def add(self,url,func):client = socket.socket()client.setblocking(False) # 非阻塞try:client.connect((url, 80))except BlockingIOError as e:passobj = Req(client,func)self.conn_list.append(obj)self.socket_list.append(obj)def run(self):while True:rlist,wlist,elist = select.select(self.socket_list,self.conn_list,[],0.005)# wlist中表示已经连接成功的req对象for sk in wlist:# 发生变换的req对象sk.sock.sendall(b'GET /s?wd=alex HTTP/1.0\r\nhost:www.baidu.com\r\n\r\n')self.conn_list.remove(sk)for sk in rlist:chunk_list = []while True:try:chunk = sk.sock.recv(8096)if not chunk:breakchunk_list.append(chunk)except BlockingIOError as e:breakbody = b''.join(chunk_list)# print(body.decode('utf-8'))sk.func(body)sk.sock.close()self.socket_list.remove(sk)if not self.socket_list:breakdef baidu_repsonse(body):print('百度下载结果:',body)def sogou_repsonse(body):print('搜狗下载结果:', body)def oldboyedu_repsonse(body):print('老男孩下载结果:', body)t1 = Nb()
t1.add('www.baidu.com',baidu_repsonse)
t1.add('www.sogou.com',sogou_repsonse)
t1.add('www.oldboyedu.com',oldboyedu_repsonse)
t1.run()
具体使用方法
from lzl import Nbdef baidu_repsonse(body):print('百度下载结果:',body)def sogou_repsonse(body):print('搜狗下载结果:', body)def oldboyedu_repsonse(body):print('老男孩下载结果:', body)t1 = Nb()
t1.add('www.baidu.com',baidu_repsonse)
t1.add('www.sogou.com',sogou_repsonse)
t1.add('www.oldboyedu.com',oldboyedu_repsonse)
t1.run()
协程
本身是不存在的,是由开发人员创造出来的一个不是真实存在的东西
微线程,对一个线程/进程进行分片,使得线程在代码块之间进行来回切换执行,而不是原来那样逐行执行
单纯的协程是没有什么实际用处的
import greenletdef f1():print(11)gr2.switch()print(22)gr2.switch()def f2():print(33)gr1.switch()print(44)# 协程 gr1
gr1 = greenlet.greenlet(f1)
# 协程 gr2
gr2 = greenlet.greenlet(f2)# 输出结果:
11
33
22
44# 单纯的协程编写起来还不如我直接写函数调用来的方便呢,所以单纯的协程是无用的def f1():print(11)print(33)def f2():print(22)print(44)f1()
f2()# 输出结果是一样的:
11
33
22
44
协程 + IO切换,那它才真正的厉害起来
from gevent import monkey
monkey.patch_all()import requests
import geventdef get_path(url):ret = requests.get(url)print(url, ret.content)def get_path2(url):ret2 = requests.get(url)print("-----------",url,ret2.content)def get_path3(url):ret3 = requests.get(url)print(url,ret3.content)ret = gevent.joinall([gevent.spawn(get_path,'https://www.zhihu.com'),gevent.spawn(get_path2,"https://www.douban.com"),gevent.spawn(get_path3,"https://www.baidu.com/s?ie=utf-8&f=8&rsv_bp=0&rsv_idx=1&tn=baidu&wd=zkx"),
])