Python使用Beautiful Soup及解析html获取元素并提取内容值  1. 包括解析获取标题 2. 根据标签及id获取所有元素 3. 根据标签及class获取所有元素 4. 获取元素下的标签的值 5. 获取元素下的parent及child的元素的值 参考   
 
from  bs4 import  BeautifulSoupfile_html =  'test/demo.html' 
file  =  open ( file_html,  "rb" ) 
html =  file . read( ) . decode( "utf-8" ) 
bs =  BeautifulSoup( html,  "html.parser" ) print ( "获取文章title" ) 
print ( bs. title) 
id_list =  bs. find_all( 'input' ,  id = 'mSearchInput' ) 
div_class_list =  bs. find_all( 'div' ,  class_= 'view-num-box' ) 
for  i,  div in  enumerate ( div_class_list) : print ( i,  div. text,  ' parent: ' ,  div. parent. text) print ( '-----------------------------------------------------------' ) 
blog_list =  bs. find_all( 'article' ,  class_= 'blog-list-box' ) 
for  i,  blog in  enumerate ( blog_list) : print ( i,  blog. text,  '\ntitle: ' ,  bs. find_all( 'div' ,  class_= 'blog-list-box-top' ) [ i] . text) print ( blog. h4. text)   print ( blog. span. text) print ( blog. div,  blog. div. next ) for  j,  content in  enumerate ( blog. contents) : print ( 'contents: ' ,  j,  content. text) for  j,  child in  enumerate ( blog. children) : print ( 'child: ' ,  j,  child. text) div_list =  bs. find_all( 'div' ,  class_= 'user-profile-head-address' ) 
print ( 'div_list: ' ,  div_list[ 0 ] . text) meta_list =  bs. find_all( 'meta' ) 
for  j,  meta in  enumerate ( meta_list) : print ( j,  meta. text,  meta. attrs[ 'content' ] ) 
print ( "2. NavigableString的例子:获取title的string内容和div的属性" ) 
print ( bs. title. string) 
print ( bs. div. attrs)   
print ( "3. BeautifulSoup的例子:获取整个html文档的name" ) 
print ( bs. name) 
print ( "4. Comment的例子:获取a的string" ) 
print ( bs. a. string) 
https://blog.csdn.net/qq_42732153/article/details/81105725 https://blog.csdn.net/qq_50587771/article/details/123870433