本文共 1025 字,大约阅读时间需要 3 分钟。
from BeautifulSoup import BeautifulSoup import re doc = [ '<html><head><title>Page title</title></head>' , '<body><p id="firstpara" align="center">This is paragraph <b>one</b>.' , '<p id="secondpara" align="blah">This is paragraph <b>two</b>.' , '</html>' ] soup = BeautifulSoup(''.join(doc)) print soup.prettify() |
运行结果为:
print soup.contents[ 0 ].name # print soup.contents[ 0 ].contents[ 0 ].name for i in range ( len (soup.contents[ 0 ])): print soup.contents[ 0 ].contents[i].name |
titleTag = soup.html.head.title titleTag # <title>Page title</title> titleTag.string # u'Page title' len (soup( 'p' )) # 2 soup.findAll( 'p' , align = "center" ) # [<p id="firstpara" align="center">This is paragraph <b>one</b>. </p>] soup.find( 'p' , align = "center" ) # <p id="firstpara" align="center">This is paragraph <b>one</b>. </p> soup( 'p' , align = "center" )[ 0 ][ 'id' ] # u'firstpara' soup.find( 'p' , align = re. compile ( '^b.*' ))[ 'id' ] # u'secondpara' soup.find( 'p' ).b.string # u'one' soup( 'p' )[ 1 ].b.string # u'two' |
转载地址:http://zktwo.baihongyu.com/