您可以尝试以下操作:
from bs4 import BeautifulSoup as soup
import urllib
import re
s = soup(str(urllib.urlopen('https://www.bseindia.com/corporates/shpSecurities.aspx?scripcd=500209&qtrid=96.00').read()), 'lxml')
results = filter(None, [re.sub('[\n\r]+|\s{2,}', '', i.text) for i in s.find_all('td', {'class':re.compile('TTRow_right|TTRow_left')})])
输出:
[u'(A) Promoter & Promoter Group', u'19', u'28,17,02,889', u'28,17,02,889', u'12.90', u'28,17,02,889', u'(B) Public', u'9,16,058', u'1,87,81,45,362', u'1,32,95,642', u'1,89,14,41,004', u'86.61', u'1,88,74,40,959', u'(C1) Shares underlying DRs', u'0.00', u'(C2) Shares held by Employee Trust', u'1', u'1,08,05,896', u'1,08,05,896', u'0.49', u'1,08,05,896', u'(C) Non Promoter-Non Public', u'1', u'1,08,05,896', u'1,08,05,896', u'0.49', u'1,08,05,896', u'Grand Total', u'9,16,078', u'2,17,06,54,147', u'1,32,95,642', u'2,18,39,49,789', u'100.00', u'2,17,99,49,744']