python apply_async函数_进程池未执行apply_async中添加的函数就直接结束了

代码没有执行apply_async中添加的函数就直接结束了

from bs4 import BeautifulSoup

import random

import requests

import pymongo

import datetime

import random

import time

from multiprocessing import Pool

user_agents = [

'Mozilla/5.0 (iPhone; CPU iPhone OS 9_1 like Mac OS X) AppleWebKit/601.1.46 (KHTML, like Gecko) Version/9.0 '

'Mobile/13B143 Safari/601.1]',

'Mozilla/5.0 (Linux; Android 5.0; SM-G900P Build/LRX21T) AppleWebKit/537.36 (KHTML, like Gecko) '

'Chrome/48.0.2564.23 Mobile Safari/537.36',

'Mozilla/5.0 (Linux; Android 5.1.1; Nexus 6 Build/LYZ28E) AppleWebKit/537.36 (KHTML, like Gecko) '

'Chrome/48.0.2564.23 Mobile Safari/537.36']

heads = {

'User_Agent': random.choice(user_agents)

}

ipHeads = {

'Upgrade-Insecure-Requests':'1',

'User-Agent':random.choice(user_agents),

'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',

'Referer':'http://www.xicidaili.com/nn/',

'Accept-Encoding':'gzip, deflate, sdch',

'Accept-Language':'zh-CN,zh;q=0.8',

}

class douban():

def __init__(self):

self.client = pymongo.MongoClient('localhost', 27017)

self.db = self.client['books']

self.tool = self.client['tool']

self.collectIp = self.tool['ip']

def getFromSQL(self):

item = self.collectIp.find_one({'http': 'http'})

proxies = {}

proxies[item['http']] = 'http://' + item['ip'] + ':' + item['port']

return proxies

def getAllTag(self):

ipDic = {}

url = 'https://book.douban.com/tag/?view=type&icn=index-sorttags-all'

proxies = self.getFromSQL()

s = requests.get(url,headers=heads,proxies=proxies)

if s.status_code == 403:

values = list(proxies.values())[0]

ip = values.split('//')[1].split(':')[0]

self.collect.remove({'ip': ip})

proxies = getFromSQL()

s = requests.get(url,headers=heads,proxies=proxies)

soup = BeautifulSoup(s.text,'lxml')

titleTags= soup.find_all('a', class_='tag-title-wrapper')

tagList = soup.find_all('table',class_='tagCol')

href = {}

titleList = []

i = 0

for titleTag in titleTags:

title = titleTag['name']

titleList.append(title)

trs = tagList[i].find_all('tr')

hreflist = []

for tr in trs:

hreflist.append(tr.td.a['href'])

href[title] = hreflist

i = i + 1

return titleList,href

def getAllBookUrl(self,title, hrefDic):

print('a')

collect = self.db[title]

for href in hrefDic[title]:

index = 0

while 1:

url = 'https://book.douban.com' + href +'?start='+ str(index) + '&type=T'

proxies = self.getFromSQL()

s = requests.get(url, headers=heads,proxies=proxies)

if s.status_code == 403:

values = list(proxies.values())[0]

ip = values.split('//')[1].split(':')[0]

collect.remove({'ip': ip})

proxies = self.getFromSQL()

s = requests.get(url,headers=heads,proxies=proxies)

html = s.text

soup = BeautifulSoup(html, 'lxml')

liList= soup.find_all('li',class_='subject-item')

if len(liList):

for li in liList:

id = li.find('a')['href'][32:-1]

collect.insert({'bookId':id})

index += 20

time.sleep(3)

else:

break

if __name__== '__main__':

p = Pool(4)

a = douban()

titleList, hrefDic = a.getAllTag()

for i in range(len(titleList)):

print('开始爬取%s'%titleList[i])

p.apply_async(a.getAllBookUrl, args=(titleList[i],hrefDic))

p.close()

p.join()

# a = douban()

# titleList, hrefDic = a.getAllTag()

# a.getAllBookUrl(titleList[0],hrefDic)

# print('done')

本文来自互联网用户投稿，该文观点仅代表作者本人，不代表本站立场。本站仅提供信息存储空间服务，不拥有所有权，不承担相关法律责任。如若转载，请注明出处：http://www.mzph.cn/news/338676.shtml

如若内容造成侵权/违法违规/事实不符，请联系多彩编程网进行投诉反馈email:809451989@qq.com，一经查实，立即删除！

python apply_async函数_进程池未执行apply_async中添加的函数就直接结束了

相关文章

java8 streams_Java 8 Streams API作为友好的ForkJoinPool外观

嵌入式linux文件系统启动,嵌入式Linux之文件系统启动分析【原创】

C语言：如何定义一个和库函数名一样的函数，并在函数中调用该库函数

python计算派的近似值、当任意项的值小于10^-4_编程计算e的近似值，直到最后一项的绝对值小于10^-5时为止，输出e的值并统计累加项数...

jdk8集合类流_JDK 8中的流驱动的集合功能

C语言可变参数只会用算啥本事？看我来抽丝剥茧干翻它！

Linux下CMAKE编译jsoncpp,使用CMake引入jsoncpp

python打印星星居中_python实现while循环打印星星的四种形状

java8升级java12_为什么现在是升级到Java 8的最佳时机

C语言#include还有些你不知道的事

java常见的ide_在三个Java IDE中生成的三种常见方法

深度linux安装依赖,Linux -- Ubuntu下载deepin wine依赖问题笔记

什么是C语言中的隐式函数声明？

群晖上传源文件不存在_群晖NAS连接百度网盘报错？原因是这样的

ssl/tls服务器瞬时_SSL / TLS REST服务器–带有Spring和TomEE的客户端

linux kvm百度云,容器与云|如何在 Ubuntu Linux 上使用 KVM 云镜像

C 的16个大坑，你能躲过几个？

python占位符怎么输入_python占位符怎么输入

java 性能调优_Java性能调优调查结果（第三部分）

不懂指针类型，7个例子给你讲明白