爬虫(持续更新ing)
request模块
import requests
url= 'https://www.baidu.com'
res= requests. get( url)
print ( res. content. decode( ) )
print ( 'url' , res. url)
print ( 'request headers' , res. request. headers)
print ( 'res headers' , res. headers)
import requests
url= 'https://www.baidu.com/img/flexible/logo/pc/result.png'
res= requests. get( url)
with open ( './img/jwq.png' , 'wb' ) as img: img. write( res. content)
import requests
url= 'https://www.baidu.com'
headers= { 'user-agent' : 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36'
}
res= requests. get( url, headers= headers)
print ( res. content. decode( ) )
print ( len ( res. content. decode( ) ) )
print ( res. request. headers)
import requests
import random
url= 'https://www.baidu.com'
user_agents = [ 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36 OPR/26.0.1656.60' , 'Opera/8.0 (Windows NT 5.1; U; en)' , 'Mozilla/5.0 (Windows NT 5.1; U; en; rv:1.8.1) Gecko/20061208 Firefox/2.0.0 Opera 9.50' , 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; en) Opera 9.50' , 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:34.0) Gecko/20100101 Firefox/34.0' , 'Mozilla/5.0 (X11; U; Linux x86_64; zh-CN; rv:1.9.2.10) Gecko/20100922 Ubuntu/10.10 (maverick) Firefox/3.6.10' , 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/534.57.2 (KHTML, like Gecko) Version/5.1.7 Safari/534.57.2 ' , 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.71 Safari/537.36' , 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11' , 'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/10.0.648.133 Safari/534.16' , 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.101 Safari/537.36' , 'Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko' , 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.11 (KHTML, like Gecko) Chrome/20.0.1132.11 TaoBrowser/2.0 Safari/536.11' , 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.71 Safari/537.1 LBBROWSER' , 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 732; .NET4.0C; .NET4.0E)' , 'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.84 Safari/535.11 SE 2.X MetaSr 1.0' , 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Trident/4.0; SV1; QQDownload 732; .NET4.0C; .NET4.0E; SE 2.X MetaSr 1.0) ' ,
]
ua= random. choice( user_agents)
headers= { 'user-agent' : ua
}
res= requests. get( url, headers= headers)
print ( len ( res. content. decode( ) ) )
print ( res. request. headers)
from fake_useragent import UserAgent
ua = UserAgent( )
print ( ua. random)
import requests
from fake_useragent import UserAgent
from urllib. parse import quote, unquote
ua = UserAgent( )
url= 'https://www.baidu.com/s'
name= input ( '请输入关键词:' )
params= { 'wd' : name
}
headers= { 'user-agent' : ua. random
}
res= requests. get( url, headers= headers, params= params)
print ( res. content. decode( ) )
import requests
from fake_useragent import UserAgent
ua = UserAgent( )
url= 'https://p1.music.126.net/_JcHT6u-TYhxjDbO3IhVQA==/109951170537166630.jpg?imageView&quality=89'
headers= { 'user-agent' : ua. random
}
res= requests. get( url, headers= headers)
with open ( 'img/网易云.jpg' , 'wb' ) as f: f. write( res. content)
import requests
from fake_useragent import UserAgent
ua = UserAgent( )
url= 'https://ws6.stream.qqmusic.qq.com/RS02064dfdIM38rSZY.mp3?guid=7976864250&vkey=AE4590431EAD34766DBAA9BA1A3715B3B45721EE23180669EA694EB7CA1F0DB4C8DE867A9883D4E897ED4E6F2ECF600CDFD34C78F2C07E09__v215192d1e&uin=554242051&fromtag=120052'
headers= { 'user-agent' : ua. random
}
res= requests. get( url, headers= headers)
with open ( 'video/晴天.mp3' , 'wb' ) as f: f. write( res. content)
import requests
from fake_useragent import UserAgent
ua = UserAgent( )
url= 'https://mv6.music.tc.qq.com/44B177558A20632E722F75FB6A67025F0BFC15AB98CC0B58FD3FC79E00B2EEDC9FAC3DF26DD0A319EACA6B2A30D24E2CZZqqmusic_default__v21ea05e5a/qmmv_0b53feaagaaao4ae4d5t4vtvikiaamuqaa2a.f9944.ts'
headers= { 'user-agent' : ua. random
}
res= requests. get( url, headers= headers)
with open ( 'video/qq音乐.mp4' , 'wb' ) as f: f. write( res. content)
import requests
from fake_useragent import UserAgent
ua = UserAgent( )
url= 'https://tieba.baidu.com/f?'
name= input ( '请输入关键词:' )
page= int ( input ( '请输入要保存的页数:' ) )
for i in range ( page) : params = { 'kw' : name, 'ie' : 'utf-8' , 'pn' : 0 } headers = { 'user-agent' : ua. random} res = requests. get( url, headers= headers, params= params) with open ( f'html/ { name} { i+ 1 } .html' , 'wb' ) as f: f. write( res. content)
import requests
from fake_useragent import UserAgent
class TieBa : def __init__ ( self) : self. url= 'https://tieba.baidu.com/f?' self. headers = { 'user-agent' : UserAgent( ) . random} def send ( self, params) : res = requests. get( self. url, headers= self. headers, params= params) return res. textdef save ( self, page, con) : with open ( f'html/ { page} .html' , 'w' , encoding= 'utf-8' ) as f: f. write( con) def run ( self) : name = input ( '请输入关键词:' ) pages = int ( input ( '请输入要保存的页数:' ) ) for page in range ( pages) : params = { 'kw' : name, 'ie' : 'utf-8' , 'pn' : pages * 50 } data= self. send( params) self. save( page, data)
te = TieBa( )
te. run( )
import requests
from fake_useragent import UserAgent
import jsonurl = 'https://ifanyi.iciba.com/index.php?c=trans'
headers = { 'user-agent' : UserAgent( ) . random
}
name= input ( '请输入翻译内容:' )
post_data = { 'from' : 'zh' , 'to' : 'en' , 'q' : name,
}
res = requests. post( url, headers= headers, data= post_data)
res. encoding = 'utf-8'
dict = json. loads( res. text)
print ( dict [ 'out' ] )
import requests
from fake_useragent import UserAgent
url= 'https://www.baidu.com'
headers= { 'user-agent' : UserAgent( ) . random
}
proxies= { 'http' : '1.1.1.1:9527' , 'https' : '1.1.1.1:9527'
}
res= requests. get( url, headers= headers, proxies= proxies)
print ( res. content. decode( ) )