| |
| """ |
| fetch_stocks.py — 从新浪财经 API 获取全部沪深 A 股股票代码,写入 stocks_all.txt |
| 用法: |
| python3 scripts/fetch_stocks.py # 输出到 stocks_all.txt |
| python3 scripts/fetch_stocks.py -o my_list.txt # 指定输出文件 |
| python3 scripts/fetch_stocks.py --sample 100 # 同时生成 100 只样本 |
| """ |
|
|
| import urllib.request |
| import json |
| import time |
| import sys |
| import argparse |
| from pathlib import Path |
|
|
|
|
| def fetch_node(node: str, total: int, delay: float = 0.05) -> list[str]: |
| """分页拉取指定板块所有股票代码。""" |
| symbols = [] |
| per_page = 100 |
| total_pages = (total + per_page - 1) // per_page |
| print(f" [{node}] {total_pages} 页 × {per_page} 条/页 = 约 {total} 只", flush=True) |
|
|
| for page in range(1, total_pages + 1): |
| url = ( |
| "http://vip.stock.finance.sina.com.cn/quotes_service/api/json_v2.php" |
| f"/Market_Center.getHQNodeData?node={node}&page={page}" |
| f"&num={per_page}&sort=symbol&asc=1&dpc=1" |
| ) |
| try: |
| with urllib.request.urlopen(url, timeout=15) as r: |
| data = json.loads(r.read()) |
| batch = [d["symbol"] for d in data] |
| symbols.extend(batch) |
| print(f" 第 {page:3d}/{total_pages} 页: {len(batch)} 只", flush=True) |
| except Exception as e: |
| print(f" 第 {page} 页失败: {e},跳过", file=sys.stderr) |
| time.sleep(delay) |
|
|
| return symbols |
|
|
|
|
| def get_node_count(node: str) -> int: |
| url = ( |
| "http://vip.stock.finance.sina.com.cn/quotes_service/api/json_v2.php" |
| f"/Market_Center.getHQNodeStockCount?node={node}" |
| ) |
| with urllib.request.urlopen(url, timeout=10) as r: |
| return int(r.read().strip().strip(b'"')) |
|
|
|
|
| def main(): |
| parser = argparse.ArgumentParser(description="获取沪深 A 股列表") |
| parser.add_argument("-o", "--output", default="stocks_all.txt", help="输出文件路径") |
| parser.add_argument("--sample", type=int, default=0, help="额外生成前 N 只的样本文件") |
| args = parser.parse_args() |
|
|
| print("正在查询各板块股票数量...") |
| sh_count = get_node_count("sh_a") |
| sz_count = get_node_count("sz_a") |
| print(f" 上证 A 股: {sh_count} 只") |
| print(f" 深证 A 股: {sz_count} 只") |
|
|
| print("\n开始拉取上证 A 股...") |
| sh_symbols = fetch_node("sh_a", sh_count) |
|
|
| print("\n开始拉取深证 A 股...") |
| sz_symbols = fetch_node("sz_a", sz_count) |
|
|
| all_stocks = sorted(set(sh_symbols + sz_symbols)) |
| total = len(all_stocks) |
| print(f"\n合并去重后共 {total} 只股票") |
|
|
| out = Path(args.output) |
| with open(out, "w", encoding="utf-8") as f: |
| f.write(f"# 沪深 A 股全量列表(上证 {sh_count} + 深证 {sz_count})\n") |
| f.write(f"# 共 {total} 只,获取时间: {time.strftime('%Y-%m-%d %H:%M:%S')}\n") |
| for s in all_stocks: |
| f.write(s + "\n") |
| print(f"已写入 {out}({total} 只)") |
|
|
| if args.sample > 0: |
| sample_path = out.parent / f"stocks_{args.sample}.txt" |
| with open(sample_path, "w", encoding="utf-8") as f: |
| f.write(f"# 样本 {args.sample} 只(从 stocks_all.txt 取前 {args.sample} 只)\n") |
| for s in all_stocks[: args.sample]: |
| f.write(s + "\n") |
| print(f"已写入样本 {sample_path}({args.sample} 只)") |
|
|
|
|
| if __name__ == "__main__": |
| main() |
|
|