爬取新浪财经现金流量表¶
股票代码:600357 / 600358 / 600359,保存到 D:\datasets。
In [1]:
import os
import csv
import requests
from bs4 import BeautifulSoup
In [2]:
save_dir = r'D:\datasets'
os.makedirs(save_dir, exist_ok=True)
headers = {'User-Agent': 'Mozilla/5.0'}
In [3]:
def download(stockid):
url = f'https://money.finance.sina.com.cn/corp/go.php/vFD_CashFlow/stockid/{stockid}/ctrl/part/displaytype/4.phtml'
r = requests.get(url, headers=headers, timeout=30)
r.encoding = 'gb2312'
soup = BeautifulSoup(r.text, 'html.parser')
table = soup.find('table', id='ProfitStatementNewTable0')
rows = []
for tr in table.find_all('tr'):
cells = [c.get_text(strip=True) for c in tr.find_all(['td', 'th'])]
if cells:
rows.append(cells)
company = rows[0][0].split('(')[0]
path = os.path.join(save_dir, f'{company}({stockid}) 现金流量表.csv')
with open(path, 'w', newline='', encoding='utf-8-sig') as f:
csv.writer(f).writerows(rows)
print('保存:', path)
In [4]:
for code in ['600357', '600358', '600359']:
download(code)
保存: D:\datasets\承德钒钛(600357) 现金流量表.csv
保存: D:\datasets\国旅联合(600358) 现金流量表.csv
保存: D:\datasets\新农开发(600359) 现金流量表.csv