sinastock / scripts /fetch_stocks.py
Spooker's picture
Upload 16 files
7c3e988 verified
#!/usr/bin/env python3
"""
fetch_stocks.py — 从新浪财经 API 获取全部沪深 A 股股票代码,写入 stocks_all.txt
用法:
python3 scripts/fetch_stocks.py # 输出到 stocks_all.txt
python3 scripts/fetch_stocks.py -o my_list.txt # 指定输出文件
python3 scripts/fetch_stocks.py --sample 100 # 同时生成 100 只样本
"""
import urllib.request
import json
import time
import sys
import argparse
from pathlib import Path
def fetch_node(node: str, total: int, delay: float = 0.05) -> list[str]:
"""分页拉取指定板块所有股票代码。"""
symbols = []
per_page = 100
total_pages = (total + per_page - 1) // per_page
print(f" [{node}] {total_pages} 页 × {per_page} 条/页 = 约 {total} 只", flush=True)
for page in range(1, total_pages + 1):
url = (
"http://vip.stock.finance.sina.com.cn/quotes_service/api/json_v2.php"
f"/Market_Center.getHQNodeData?node={node}&page={page}"
f"&num={per_page}&sort=symbol&asc=1&dpc=1"
)
try:
with urllib.request.urlopen(url, timeout=15) as r:
data = json.loads(r.read())
batch = [d["symbol"] for d in data]
symbols.extend(batch)
print(f" 第 {page:3d}/{total_pages} 页: {len(batch)} 只", flush=True)
except Exception as e:
print(f" 第 {page} 页失败: {e},跳过", file=sys.stderr)
time.sleep(delay)
return symbols
def get_node_count(node: str) -> int:
url = (
"http://vip.stock.finance.sina.com.cn/quotes_service/api/json_v2.php"
f"/Market_Center.getHQNodeStockCount?node={node}"
)
with urllib.request.urlopen(url, timeout=10) as r:
return int(r.read().strip().strip(b'"'))
def main():
parser = argparse.ArgumentParser(description="获取沪深 A 股列表")
parser.add_argument("-o", "--output", default="stocks_all.txt", help="输出文件路径")
parser.add_argument("--sample", type=int, default=0, help="额外生成前 N 只的样本文件")
args = parser.parse_args()
print("正在查询各板块股票数量...")
sh_count = get_node_count("sh_a")
sz_count = get_node_count("sz_a")
print(f" 上证 A 股: {sh_count} 只")
print(f" 深证 A 股: {sz_count} 只")
print("\n开始拉取上证 A 股...")
sh_symbols = fetch_node("sh_a", sh_count)
print("\n开始拉取深证 A 股...")
sz_symbols = fetch_node("sz_a", sz_count)
all_stocks = sorted(set(sh_symbols + sz_symbols))
total = len(all_stocks)
print(f"\n合并去重后共 {total} 只股票")
out = Path(args.output)
with open(out, "w", encoding="utf-8") as f:
f.write(f"# 沪深 A 股全量列表(上证 {sh_count} + 深证 {sz_count})\n")
f.write(f"# 共 {total} 只,获取时间: {time.strftime('%Y-%m-%d %H:%M:%S')}\n")
for s in all_stocks:
f.write(s + "\n")
print(f"已写入 {out}{total} 只)")
if args.sample > 0:
sample_path = out.parent / f"stocks_{args.sample}.txt"
with open(sample_path, "w", encoding="utf-8") as f:
f.write(f"# 样本 {args.sample} 只(从 stocks_all.txt 取前 {args.sample} 只)\n")
for s in all_stocks[: args.sample]:
f.write(s + "\n")
print(f"已写入样本 {sample_path}{args.sample} 只)")
if __name__ == "__main__":
main()