superxu520 commited on
Commit
d8438e3
·
1 Parent(s): e6d501f

"feat:integrate-baostock-and-optimize-trading-day-logic"

Browse files
Files changed (3) hide show
  1. app/baostock_adapter.py +219 -0
  2. requirements.txt +1 -0
  3. sync_data.py +80 -23
app/baostock_adapter.py ADDED
@@ -0,0 +1,219 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ BaoStock适配器 - 替代AkShare的部分接口
3
+ """
4
+
5
+ import logging
6
+ from typing import Optional, List, Dict, Any
7
+ import pandas as pd
8
+
9
+ logger = logging.getLogger(__name__)
10
+
11
+ # 尝试导入baostock
12
+ try:
13
+ import baostock as bs
14
+ BAOSTOCK_AVAILABLE = True
15
+ except ImportError:
16
+ BAOSTOCK_AVAILABLE = False
17
+ logger.warning("BaoStock not installed, falling back to AkShare")
18
+
19
+
20
+ class BaoStockAdapter:
21
+ """BaoStock适配器类"""
22
+
23
+ def __init__(self):
24
+ self._logged_in = False
25
+
26
+ def login(self) -> bool:
27
+ """登录BaoStock"""
28
+ if not BAOSTOCK_AVAILABLE:
29
+ return False
30
+
31
+ try:
32
+ result = bs.login()
33
+ if result.error_code == '0':
34
+ self._logged_in = True
35
+ logger.info("BaoStock login success")
36
+ return True
37
+ else:
38
+ logger.error(f"BaoStock login failed: {result.error_msg}")
39
+ return False
40
+ except Exception as e:
41
+ logger.error(f"BaoStock login error: {e}")
42
+ return False
43
+
44
+ def logout(self):
45
+ """登出BaoStock"""
46
+ if self._logged_in and BAOSTOCK_AVAILABLE:
47
+ bs.logout()
48
+ self._logged_in = False
49
+
50
+ def get_stock_list(self) -> Optional[pd.DataFrame]:
51
+ """
52
+ 获取A股股票列表
53
+ 替代: ak.stock_info_a_code_name()
54
+
55
+ Returns:
56
+ DataFrame with columns: ['code', 'name']
57
+ """
58
+ if not BAOSTOCK_AVAILABLE:
59
+ return None
60
+
61
+ if not self._logged_in:
62
+ if not self.login():
63
+ return None
64
+
65
+ try:
66
+ # 获取所有A股 - 使用query_all_stock
67
+ # 注意:BaoStock的query_all_stock返回格式特殊
68
+ rs = bs.query_all_stock(day="2024-03-15")
69
+ if rs.error_code != '0':
70
+ logger.error(f"BaoStock query_all_stock failed: {rs.error_msg}")
71
+ return None
72
+
73
+ data_list = []
74
+ codes_seen = set()
75
+
76
+ while rs.next():
77
+ row = rs.get_row_data()
78
+ # row格式: [code, tradeStatus, code_name]
79
+ code = row[0] # 如 sh.600000
80
+ # row[1] 是 tradeStatus (交易状态)
81
+ name = row[2] # 股票名称
82
+
83
+ # 过滤:只保留A股(code包含sh或sz)
84
+ # 过滤掉指数(以000/399开头的通常是指数)
85
+ code_short = code.replace('.', '')
86
+ is_index = (
87
+ code_short.startswith('sh000') or
88
+ code_short.startswith('sz399') or
89
+ code_short.startswith('sh88') or # 科创板指数
90
+ code_short.startswith('sh89') # 其他指数
91
+ )
92
+
93
+ if code.startswith(('sh.', 'sz.')) and not is_index and code not in codes_seen:
94
+ codes_seen.add(code)
95
+ data_list.append({
96
+ 'code': code_short,
97
+ 'name': name
98
+ })
99
+
100
+ df = pd.DataFrame(data_list)
101
+ logger.info(f"BaoStock get_stock_list: {len(df)} stocks")
102
+ return df
103
+
104
+ except Exception as e:
105
+ logger.error(f"BaoStock get_stock_list error: {e}")
106
+ return None
107
+
108
+ def get_stock_daily(self, code: str, start_date: str, end_date: str) -> Optional[pd.DataFrame]:
109
+ """
110
+ 获取个股历史日K数据
111
+ 替代: ak.stock_zh_a_hist()
112
+
113
+ Args:
114
+ code: 股票代码 (如 'sh600000' 或 'sz000001')
115
+ start_date: 开始日期 (YYYYMMDD)
116
+ end_date: 结束日期 (YYYYMMDD)
117
+
118
+ Returns:
119
+ DataFrame with columns compatible with existing code
120
+ """
121
+ if not BAOSTOCK_AVAILABLE:
122
+ return None
123
+
124
+ if not self._logged_in:
125
+ if not self.login():
126
+ return None
127
+
128
+ try:
129
+ # 转换代码格式: sh600000 -> sh.600000
130
+ if '.' not in code:
131
+ if code.startswith('sh') or code.startswith('sz'):
132
+ code = code[:2] + '.' + code[2:]
133
+
134
+ # BaoStock日期格式: YYYY-MM-DD
135
+ start = f"{start_date[:4]}-{start_date[4:6]}-{start_date[6:]}"
136
+ end = f"{end_date[:4]}-{end_date[4:6]}-{end_date[6:]}"
137
+
138
+ # 查询数据
139
+ # fields参数说明: date,code,open,high,low,close,preclose,volume,amount,turn,tradestatus,pctChg,isST
140
+ rs = bs.query_history_k_data_plus(
141
+ code,
142
+ "date,code,open,high,low,close,preclose,volume,amount,turn,pctChg",
143
+ start_date=start,
144
+ end_date=end,
145
+ frequency="d",
146
+ adjustflag="3" # 复权类型: 1后���权 2前复权 3不复权
147
+ )
148
+
149
+ if rs.error_code != '0':
150
+ logger.warning(f"BaoStock query_history_k_data_plus failed for {code}: {rs.error_msg}")
151
+ return None
152
+
153
+ data_list = []
154
+ while rs.next():
155
+ row = rs.get_row_data()
156
+ data_list.append({
157
+ 'trade_date': pd.to_datetime(row[0]),
158
+ 'code': code.replace('.', ''),
159
+ 'open': float(row[2]) if row[2] else None,
160
+ 'high': float(row[3]) if row[3] else None,
161
+ 'low': float(row[4]) if row[4] else None,
162
+ 'close': float(row[5]) if row[5] else None,
163
+ 'preclose': float(row[6]) if row[6] else None,
164
+ 'volume': float(row[7]) if row[7] else None,
165
+ 'amount': float(row[8]) if row[8] else None,
166
+ 'turnover': float(row[9]) if row[9] else None,
167
+ 'pct_change': float(row[10]) if row[10] else None,
168
+ })
169
+
170
+ if not data_list:
171
+ return None
172
+
173
+ df = pd.DataFrame(data_list)
174
+ logger.debug(f"BaoStock get_stock_daily for {code}: {len(df)} records")
175
+ return df
176
+
177
+ except Exception as e:
178
+ logger.warning(f"BaoStock get_stock_daily error for {code}: {e}")
179
+ return None
180
+
181
+ def get_index_daily(self, code: str, start_date: str, end_date: str) -> Optional[pd.DataFrame]:
182
+ """
183
+ 获取指数历史日K数据
184
+ 替代: ak.stock_zh_index_daily_em()
185
+
186
+ Args:
187
+ code: 指数代码 (如 'sh000300' 沪深300)
188
+ """
189
+ # 指数和个股使用相同的接口
190
+ return self.get_stock_daily(code, start_date, end_date)
191
+
192
+
193
+ # 全局适配器实例
194
+ _baostock_adapter = None
195
+
196
+ def get_baostock_adapter() -> BaoStockAdapter:
197
+ """获取BaoStock适配器实例(单例)"""
198
+ global _baostock_adapter
199
+ if _baostock_adapter is None:
200
+ _baostock_adapter = BaoStockAdapter()
201
+ return _baostock_adapter
202
+
203
+
204
+ def get_stock_list_baostock() -> Optional[pd.DataFrame]:
205
+ """
206
+ 获取股票列表(BaoStock版本)
207
+ 如果BaoStock不可用,返回None,调用方应回退到AkShare
208
+ """
209
+ adapter = get_baostock_adapter()
210
+ return adapter.get_stock_list()
211
+
212
+
213
+ def get_stock_daily_baostock(code: str, start_date: str, end_date: str) -> Optional[pd.DataFrame]:
214
+ """
215
+ 获取个股日K数据(BaoStock版本)
216
+ 如果BaoStock不可用,返回None
217
+ """
218
+ adapter = get_baostock_adapter()
219
+ return adapter.get_stock_daily(code, start_date, end_date)
requirements.txt CHANGED
@@ -1,5 +1,6 @@
1
  # 同步 Space 专用依赖
2
  akshare>=1.12.0
 
3
  pandas>=2.0.0
4
  duckdb>=0.9.0
5
  huggingface-hub>=0.20.0
 
1
  # 同步 Space 专用依赖
2
  akshare>=1.12.0
3
+ baostock>=0.8.8 # 替代AkShare部分接口
4
  pandas>=2.0.0
5
  duckdb>=0.9.0
6
  huggingface-hub>=0.20.0
sync_data.py CHANGED
@@ -18,6 +18,9 @@ import pandas as pd
18
  import akshare as ak
19
  from huggingface_hub import hf_hub_download, upload_file, list_repo_files
20
 
 
 
 
21
  # 添加当前目录到路径
22
  sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
23
 
@@ -103,25 +106,41 @@ def get_stock_list() -> pd.DataFrame:
103
  logger.info("Fetching all-market target list...")
104
  all_lists = []
105
 
106
- # A股列表获取(带重试和指数退
107
  max_retries = 5
108
  base_delay = 2.0
109
- for attempt in range(max_retries):
 
 
110
  try:
111
- # 使用 stock_info_a_code_name() 替代 stock_zh_a_spot_em(),更稳定
112
- df_a = ak.stock_info_a_code_name()
113
- df_a.columns = ['code', 'name']
114
- df_a['market'] = df_a['code'].apply(lambda x: '主板' if x.startswith(('60', '00')) else ('创业板' if x.startswith('30') else ('科创板' if x.startswith('68') else ('北交所' if x.startswith(('8', '4', '920')) else '其他'))))
115
- all_lists.append(df_a)
116
- logger.info(f"A-stock list fetched: {len(df_a)} stocks")
117
- break # 成功则退出重试循环
118
- except Exception as e:
119
- if attempt == max_retries - 1:
120
- logger.error(f"Failed to fetch A-stock list after {max_retries} attempts: {e}")
121
  else:
122
- delay = base_delay * (2 ** attempt)
123
- logger.warning(f"Attempt {attempt + 1} failed, retrying in {delay}s... Error: {e}")
124
- time.sleep(delay)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
125
 
126
  # ETF (增加容错)
127
  try:
@@ -292,7 +311,11 @@ def get_target_daily(code: str, start_date: str, market: str) -> Optional[pd.Dat
292
  fetch_start = start_date.replace('-', '')
293
  df = None
294
  if market == 'INDEX':
295
- df = ak.stock_zh_index_daily_em(symbol=f"sh{code}" if code.startswith('000') else f"sz{code}")
 
 
 
 
296
  elif market == 'ETF':
297
  df = ak.fund_etf_hist_em(symbol=code, period="daily", start_date=fetch_start, end_date=end_date, adjust="hfq")
298
  elif market == 'LOF':
@@ -306,7 +329,16 @@ def get_target_daily(code: str, start_date: str, market: str) -> Optional[pd.Dat
306
  elif market == 'REITs':
307
  df = ak.reits_hist_em(symbol=code)
308
  else:
309
- df = ak.stock_zh_a_hist(symbol=code, period="daily", start_date=fetch_start, end_date=end_date, adjust="hfq")
 
 
 
 
 
 
 
 
 
310
 
311
  if df is not None and not df.empty:
312
  # 标准化列名
@@ -340,16 +372,33 @@ def get_target_daily(code: str, start_date: str, market: str) -> Optional[pd.Dat
340
  return None
341
 
342
  def get_last_trading_day() -> str:
343
- """获取最近一个交易日(优先使用交易日历)"""
 
 
 
 
 
 
 
 
344
  try:
345
  # 获取交易日历(新浪接口,包含未来日期)
346
  df = ak.tool_trade_date_hist_sina()
347
  if df is not None and not df.empty:
348
- # 转换为日期格式并过滤出 <= 今天的日期
349
  df['trade_date'] = pd.to_datetime(df['trade_date']).dt.date
350
- today = get_beijing_time().date()
351
- # 找到最后一个交易日
352
- last_day = df[df['trade_date'] <= today]['trade_date'].iloc[-1]
 
 
 
 
 
 
 
 
 
353
  return last_day.strftime('%Y-%m-%d')
354
  except Exception as e:
355
  logger.warning(f"Failed to get trading calendar: {e}")
@@ -360,11 +409,19 @@ def get_last_trading_day() -> str:
360
  if df is not None and not df.empty:
361
  date_col = 'date' if 'date' in df.columns else ('日期' if '日期' in df.columns else None)
362
  if date_col:
363
- return pd.to_datetime(df[date_col].iloc[-1]).strftime('%Y-%m-%d')
 
 
 
 
 
364
  except Exception: pass
365
 
366
  # 最终回退:按工作日估算
367
  d = get_beijing_time()
 
 
 
368
  while d.weekday() >= 5:
369
  d -= timedelta(days=1)
370
  return d.strftime('%Y-%m-%d')
 
18
  import akshare as ak
19
  from huggingface_hub import hf_hub_download, upload_file, list_repo_files
20
 
21
+ # BaoStock适配器(优先使用,更稳定)
22
+ from app.baostock_adapter import get_stock_list_baostock, get_stock_daily_baostock, BAOSTOCK_AVAILABLE
23
+
24
  # 添加当前目录到路径
25
  sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
26
 
 
106
  logger.info("Fetching all-market target list...")
107
  all_lists = []
108
 
109
+ # A股列表获取(优先使用BaoStock,失败则回退到AkShare
110
  max_retries = 5
111
  base_delay = 2.0
112
+
113
+ # 尝试使用BaoStock获取A股列表
114
+ if BAOSTOCK_AVAILABLE:
115
  try:
116
+ df_a = get_stock_list_baostock()
117
+ if df_a is not None and len(df_a) > 0:
118
+ df_a['market'] = df_a['code'].apply(lambda x: '主板' if x.startswith(('60', '00')) else ('创业板' if x.startswith('30') else ('科创板' if x.startswith('68') else ('北交所' if x.startswith(('8', '4', '920')) else '其他'))))
119
+ all_lists.append(df_a)
120
+ logger.info(f"A-stock list fetched from BaoStock: {len(df_a)} stocks")
 
 
 
 
 
121
  else:
122
+ raise Exception("BaoStock returned empty data")
123
+ except Exception as e:
124
+ logger.warning(f"BaoStock get_stock_list failed: {e}, falling back to AkShare")
125
+
126
+ # 如果BaoStock失败或不可用,使用AkShare
127
+ if not all_lists:
128
+ for attempt in range(max_retries):
129
+ try:
130
+ # 使用 stock_info_a_code_name() 替代 stock_zh_a_spot_em(),更稳定
131
+ df_a = ak.stock_info_a_code_name()
132
+ df_a.columns = ['code', 'name']
133
+ df_a['market'] = df_a['code'].apply(lambda x: '主板' if x.startswith(('60', '00')) else ('创业板' if x.startswith('30') else ('科创板' if x.startswith('68') else ('北交所' if x.startswith(('8', '4', '920')) else '其他'))))
134
+ all_lists.append(df_a)
135
+ logger.info(f"A-stock list fetched from AkShare: {len(df_a)} stocks")
136
+ break # 成功则退出重试循环
137
+ except Exception as e:
138
+ if attempt == max_retries - 1:
139
+ logger.error(f"Failed to fetch A-stock list after {max_retries} attempts: {e}")
140
+ else:
141
+ delay = base_delay * (2 ** attempt)
142
+ logger.warning(f"Attempt {attempt + 1} failed, retrying in {delay}s... Error: {e}")
143
+ time.sleep(delay)
144
 
145
  # ETF (增加容错)
146
  try:
 
311
  fetch_start = start_date.replace('-', '')
312
  df = None
313
  if market == 'INDEX':
314
+ # 指数:优先使用BaoStock,失败则回退AkShare
315
+ if BAOSTOCK_AVAILABLE:
316
+ df = get_stock_daily_baostock(f"sh{code}" if code.startswith('000') else f"sz{code}", fetch_start, end_date)
317
+ if df is None:
318
+ df = ak.stock_zh_index_daily_em(symbol=f"sh{code}" if code.startswith('000') else f"sz{code}")
319
  elif market == 'ETF':
320
  df = ak.fund_etf_hist_em(symbol=code, period="daily", start_date=fetch_start, end_date=end_date, adjust="hfq")
321
  elif market == 'LOF':
 
329
  elif market == 'REITs':
330
  df = ak.reits_hist_em(symbol=code)
331
  else:
332
+ # A股个股:优先使用BaoStock,失败则回退AkShare
333
+ if BAOSTOCK_AVAILABLE:
334
+ # BaoStock代码格式: sh600000, sz000001
335
+ if code.startswith('6'):
336
+ bs_code = f"sh{code}"
337
+ else:
338
+ bs_code = f"sz{code}"
339
+ df = get_stock_daily_baostock(bs_code, fetch_start, end_date)
340
+ if df is None:
341
+ df = ak.stock_zh_a_hist(symbol=code, period="daily", start_date=fetch_start, end_date=end_date, adjust="hfq")
342
 
343
  if df is not None and not df.empty:
344
  # 标准化列名
 
372
  return None
373
 
374
  def get_last_trading_day() -> str:
375
+ """获取最近一个交易日(优先使用交易日历)
376
+
377
+ 注意:由于数据在收盘后获取,如果今天是交易日但当前时间 < 20:00,
378
+ 则返回前一个交易日,避免获取不完整的当天数据。
379
+ """
380
+ now = get_beijing_time()
381
+ today = now.date()
382
+ current_hour = now.hour
383
+
384
  try:
385
  # 获取交易日历(新浪接口,包含未来日期)
386
  df = ak.tool_trade_date_hist_sina()
387
  if df is not None and not df.empty:
388
+ # 转换为日期格式
389
  df['trade_date'] = pd.to_datetime(df['trade_date']).dt.date
390
+
391
+ # 找到今天的交易日(如果今天是交易日)
392
+ today_is_trading_day = today in df['trade_date'].values
393
+
394
+ # 如果今天是交易日但当前时间 < 20:00,则排除今天
395
+ if today_is_trading_day and current_hour < 20:
396
+ last_day = df[df['trade_date'] < today]['trade_date'].iloc[-1]
397
+ logger.info(f"Today is trading day but current time is {now.strftime('%H:%M')} (< 20:00), using previous trading day: {last_day}")
398
+ else:
399
+ # 否则使用 <= 今天的最后一个交易日
400
+ last_day = df[df['trade_date'] <= today]['trade_date'].iloc[-1]
401
+
402
  return last_day.strftime('%Y-%m-%d')
403
  except Exception as e:
404
  logger.warning(f"Failed to get trading calendar: {e}")
 
409
  if df is not None and not df.empty:
410
  date_col = 'date' if 'date' in df.columns else ('日期' if '日期' in df.columns else None)
411
  if date_col:
412
+ last_date = pd.to_datetime(df[date_col].iloc[-1]).date()
413
+ # 同样检查时间条件
414
+ if last_date == today and current_hour < 20:
415
+ # 返回前一天的数据
416
+ return pd.to_datetime(df[date_col].iloc[-2]).strftime('%Y-%m-%d')
417
+ return last_date.strftime('%Y-%m-%d')
418
  except Exception: pass
419
 
420
  # 最终回退:按工作日估算
421
  d = get_beijing_time()
422
+ # 如果当前时间 < 20:00,从昨天开始查找
423
+ if current_hour < 20:
424
+ d -= timedelta(days=1)
425
  while d.weekday() >= 5:
426
  d -= timedelta(days=1)
427
  return d.strftime('%Y-%m-%d')