Spaces:
Running
Running
superxuu commited on
Commit ·
d1ec6b7
1
Parent(s): 85640dd
feat: Add Yahoo Finance as fallback data source for overseas environments
Browse files- backend/requirements.txt +1 -0
- backend/scripts/sync_data.py +72 -82
backend/requirements.txt
CHANGED
|
@@ -6,3 +6,4 @@ akshare>=1.18.25
|
|
| 6 |
huggingface_hub>=0.26.0
|
| 7 |
pydantic>=2.10.0
|
| 8 |
python-dotenv>=1.0.0
|
|
|
|
|
|
| 6 |
huggingface_hub>=0.26.0
|
| 7 |
pydantic>=2.10.0
|
| 8 |
python-dotenv>=1.0.0
|
| 9 |
+
yfinance>=0.2.36
|
backend/scripts/sync_data.py
CHANGED
|
@@ -85,117 +85,107 @@ def get_stock_list() -> pd.DataFrame:
|
|
| 85 |
raise
|
| 86 |
|
| 87 |
|
|
|
|
|
|
|
| 88 |
def get_stock_daily(code: str, years: int = YEARS_OF_DATA) -> Optional[pd.DataFrame]:
|
| 89 |
"""
|
| 90 |
获取单只股票的历史日线数据(后复权)
|
| 91 |
-
|
| 92 |
-
Args:
|
| 93 |
-
code: 股票代码
|
| 94 |
-
years: 获取最近几年的数据
|
| 95 |
-
|
| 96 |
-
Returns:
|
| 97 |
-
DataFrame with OHLCV data or None if failed
|
| 98 |
"""
|
|
|
|
| 99 |
try:
|
| 100 |
-
# 计算日期范围
|
| 101 |
end_date = datetime.now().strftime('%Y%m%d')
|
| 102 |
start_date = (datetime.now() - timedelta(days=years * 365)).strftime('%Y%m%d')
|
| 103 |
|
| 104 |
-
# 使用 AkShare 获取后复权数据
|
| 105 |
df = ak.stock_zh_a_hist(
|
| 106 |
symbol=code,
|
| 107 |
period="daily",
|
| 108 |
start_date=start_date,
|
| 109 |
end_date=end_date,
|
| 110 |
-
adjust="hfq"
|
| 111 |
)
|
| 112 |
|
| 113 |
-
if df is None
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
'
|
| 120 |
-
'
|
| 121 |
-
'
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
# 添加股票代码
|
| 130 |
-
df['code'] = code
|
| 131 |
-
|
| 132 |
-
# 转换日期格式
|
| 133 |
-
df['trade_date'] = pd.to_datetime(df['trade_date'])
|
| 134 |
-
|
| 135 |
-
# 选择需要的列
|
| 136 |
-
columns = ['code', 'trade_date', 'open', 'high', 'low', 'close', 'volume', 'amount', 'pct_chg', 'turnover_rate']
|
| 137 |
-
df = df[[col for col in columns if col in df.columns]]
|
| 138 |
|
| 139 |
-
|
|
|
|
|
|
|
| 140 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 141 |
except Exception as e:
|
| 142 |
-
logger.
|
| 143 |
-
|
|
|
|
| 144 |
|
| 145 |
|
| 146 |
def get_index_daily(code: str, years: int = YEARS_OF_DATA) -> Optional[pd.DataFrame]:
|
| 147 |
"""
|
| 148 |
获取指数的历史日线数据
|
| 149 |
-
|
| 150 |
-
Args:
|
| 151 |
-
code: 指数代码(如 '000300' 沪深300)
|
| 152 |
-
years: 获取最近几年的数据
|
| 153 |
-
|
| 154 |
-
Returns:
|
| 155 |
-
DataFrame with OHLCV data or None if failed
|
| 156 |
"""
|
|
|
|
| 157 |
try:
|
| 158 |
-
# 计算日期范围
|
| 159 |
end_date = datetime.now().strftime('%Y%m%d')
|
| 160 |
start_date = (datetime.now() - timedelta(days=years * 365)).strftime('%Y%m%d')
|
| 161 |
-
|
| 162 |
-
# 使用 AkShare 获取指数数据
|
| 163 |
df = ak.stock_zh_index_daily(symbol=f"sh{code}")
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
df = df.rename(columns={
|
| 174 |
-
'date': 'trade_date',
|
| 175 |
-
'open': 'open',
|
| 176 |
-
'high': 'high',
|
| 177 |
-
'low': 'low',
|
| 178 |
-
'close': 'close',
|
| 179 |
-
'volume': 'volume',
|
| 180 |
-
})
|
| 181 |
-
|
| 182 |
-
# 添加代码
|
| 183 |
-
df['code'] = code
|
| 184 |
-
|
| 185 |
-
# 添加缺失的列
|
| 186 |
-
df['amount'] = 0
|
| 187 |
-
df['pct_chg'] = df['close'].pct_change() * 100
|
| 188 |
-
df['turnover_rate'] = 0
|
| 189 |
-
|
| 190 |
-
# 选择需要的列
|
| 191 |
-
columns = ['code', 'trade_date', 'open', 'high', 'low', 'close', 'volume', 'amount', 'pct_chg', 'turnover_rate']
|
| 192 |
-
df = df[[col for col in columns if col in df.columns]]
|
| 193 |
-
|
| 194 |
-
return df
|
| 195 |
-
|
| 196 |
except Exception as e:
|
| 197 |
-
logger.
|
| 198 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 199 |
|
| 200 |
|
| 201 |
def sync_stock_list(df: pd.DataFrame) -> None:
|
|
|
|
| 85 |
raise
|
| 86 |
|
| 87 |
|
| 88 |
+
import yfinance as yf
|
| 89 |
+
|
| 90 |
def get_stock_daily(code: str, years: int = YEARS_OF_DATA) -> Optional[pd.DataFrame]:
|
| 91 |
"""
|
| 92 |
获取单只股票的历史日线数据(后复权)
|
| 93 |
+
优先使用 AkShare,海外环境失败则尝试 Yahoo Finance
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 94 |
"""
|
| 95 |
+
# 1. 尝试 AkShare (国内源)
|
| 96 |
try:
|
|
|
|
| 97 |
end_date = datetime.now().strftime('%Y%m%d')
|
| 98 |
start_date = (datetime.now() - timedelta(days=years * 365)).strftime('%Y%m%d')
|
| 99 |
|
|
|
|
| 100 |
df = ak.stock_zh_a_hist(
|
| 101 |
symbol=code,
|
| 102 |
period="daily",
|
| 103 |
start_date=start_date,
|
| 104 |
end_date=end_date,
|
| 105 |
+
adjust="hfq"
|
| 106 |
)
|
| 107 |
|
| 108 |
+
if df is not None and not df.empty:
|
| 109 |
+
df = df.rename(columns={
|
| 110 |
+
'日期': 'trade_date', '开盘': 'open', '最高': 'high', '最低': 'low',
|
| 111 |
+
'收盘': 'close', '成交量': 'volume', '成交额': 'amount',
|
| 112 |
+
'涨跌幅': 'pct_chg', '换手率': 'turnover_rate'
|
| 113 |
+
})
|
| 114 |
+
df['code'] = code
|
| 115 |
+
df['trade_date'] = pd.to_datetime(df['trade_date'])
|
| 116 |
+
return df[['code', 'trade_date', 'open', 'high', 'low', 'close', 'volume', 'amount', 'pct_chg', 'turnover_rate']]
|
| 117 |
+
except Exception as e:
|
| 118 |
+
logger.warning(f"AkShare failed for {code}, trying Yahoo Finance: {e}")
|
| 119 |
+
|
| 120 |
+
# 2. 尝试 Yahoo Finance (海外源)
|
| 121 |
+
try:
|
| 122 |
+
# 转换代码格式: 600519 -> 600519.SS, 000001 -> 000001.SZ
|
| 123 |
+
yf_code = f"{code}.SS" if code.startswith('6') or code.startswith('9') else f"{code}.SZ"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 124 |
|
| 125 |
+
ticker = yf.Ticker(yf_code)
|
| 126 |
+
# 获取最近3年数据
|
| 127 |
+
df_yf = ticker.history(period=f"{years}y", interval="1d", auto_adjust=True)
|
| 128 |
|
| 129 |
+
if df_yf is not None and not df_yf.empty:
|
| 130 |
+
df_yf = df_yf.reset_index()
|
| 131 |
+
df_yf = df_yf.rename(columns={
|
| 132 |
+
'Date': 'trade_date', 'Open': 'open', 'High': 'high', 'Low': 'low',
|
| 133 |
+
'Close': 'close', 'Volume': 'volume'
|
| 134 |
+
})
|
| 135 |
+
df_yf['code'] = code
|
| 136 |
+
df_yf['trade_date'] = pd.to_datetime(df_yf['trade_date']).dt.tz_localize(None)
|
| 137 |
+
df_yf['amount'] = df_yf['close'] * df_yf['volume'] # 估算成交额
|
| 138 |
+
df_yf['pct_chg'] = df_yf['close'].pct_change() * 100
|
| 139 |
+
df_yf['turnover_rate'] = 0 # Yahoo 不直接提供换手率
|
| 140 |
+
|
| 141 |
+
return df_yf[['code', 'trade_date', 'open', 'high', 'low', 'close', 'volume', 'amount', 'pct_chg', 'turnover_rate']]
|
| 142 |
except Exception as e:
|
| 143 |
+
logger.error(f"Yahoo Finance also failed for {code}: {e}")
|
| 144 |
+
|
| 145 |
+
return None
|
| 146 |
|
| 147 |
|
| 148 |
def get_index_daily(code: str, years: int = YEARS_OF_DATA) -> Optional[pd.DataFrame]:
|
| 149 |
"""
|
| 150 |
获取指数的历史日线数据
|
| 151 |
+
优先使用 AkShare,海外环境失败则尝试 Yahoo Finance
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 152 |
"""
|
| 153 |
+
# 1. 尝试 AkShare
|
| 154 |
try:
|
|
|
|
| 155 |
end_date = datetime.now().strftime('%Y%m%d')
|
| 156 |
start_date = (datetime.now() - timedelta(days=years * 365)).strftime('%Y%m%d')
|
|
|
|
|
|
|
| 157 |
df = ak.stock_zh_index_daily(symbol=f"sh{code}")
|
| 158 |
+
if df is not None and not df.empty:
|
| 159 |
+
df['date'] = pd.to_datetime(df['date'])
|
| 160 |
+
df = df[(df['date'] >= start_date) & (df['date'] <= end_date)]
|
| 161 |
+
df = df.rename(columns={'date': 'trade_date'})
|
| 162 |
+
df['code'] = code
|
| 163 |
+
df['amount'] = 0
|
| 164 |
+
df['pct_chg'] = df['close'].pct_change() * 100
|
| 165 |
+
df['turnover_rate'] = 0
|
| 166 |
+
return df[['code', 'trade_date', 'open', 'high', 'low', 'close', 'volume', 'amount', 'pct_chg', 'turnover_rate']]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 167 |
except Exception as e:
|
| 168 |
+
logger.warning(f"AkShare index failed for {code}, trying Yahoo Finance: {e}")
|
| 169 |
+
|
| 170 |
+
# 2. 尝试 Yahoo Finance
|
| 171 |
+
try:
|
| 172 |
+
# 沪深300在 Yahoo 的代码是 000300.SS
|
| 173 |
+
yf_code = f"{code}.SS"
|
| 174 |
+
ticker = yf.Ticker(yf_code)
|
| 175 |
+
df_yf = ticker.history(period=f"{years}y", interval="1d")
|
| 176 |
+
if df_yf is not None and not df_yf.empty:
|
| 177 |
+
df_yf = df_yf.reset_index()
|
| 178 |
+
df_yf = df_yf.rename(columns={'Date': 'trade_date', 'Open': 'open', 'High': 'high', 'Low': 'low', 'Close': 'close', 'Volume': 'volume'})
|
| 179 |
+
df_yf['code'] = code
|
| 180 |
+
df_yf['trade_date'] = pd.to_datetime(df_yf['trade_date']).dt.tz_localize(None)
|
| 181 |
+
df_yf['amount'] = 0
|
| 182 |
+
df_yf['pct_chg'] = df_yf['close'].pct_change() * 100
|
| 183 |
+
df_yf['turnover_rate'] = 0
|
| 184 |
+
return df_yf[['code', 'trade_date', 'open', 'high', 'low', 'close', 'volume', 'amount', 'pct_chg', 'turnover_rate']]
|
| 185 |
+
except Exception as e:
|
| 186 |
+
logger.error(f"Yahoo Finance index also failed for {code}: {e}")
|
| 187 |
+
|
| 188 |
+
return None
|
| 189 |
|
| 190 |
|
| 191 |
def sync_stock_list(df: pd.DataFrame) -> None:
|