Spaces:

superxu520
/

sync_stock

Paused

File size: 84,933 Bytes

86f4c82
 
 
 
 
 
 
 
 
c636e4b
1004573
86f4c82
 
 
 
 
 
 
844dc5e
86f4c82
c4a2fde
 
 
 
 
 
 
 
 
91985a9
 
 
 
 
 
d8438e3
86f4c82
 
 
 
 
42c3910
b47f655
86f4c82
 
 
 
 
 
 
 
 
aa468bc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e6d501f
aa468bc
e6d501f
 
 
 
 
 
aa468bc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
86f4c82
 
b47f655
 
 
 
 
 
 
86f4c82
 
c83976a
c4a2fde
2d86dae
 
d8438e3
c4a2fde
 
c83976a
c4a2fde
d8438e3
 
c4a2fde
c83976a
c4a2fde
d8438e3
c4a2fde
d8438e3
c4a2fde
d8438e3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
86f4c82
c6a3487
 
0636b12
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c6a3487
 
86f4c82
c6a3487
 
0636b12
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c6a3487
 
86f4c82
c6a3487
 
0636b12
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c6a3487
 
 
 
 
 
0636b12
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ad9edf1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0636b12
 
 
 
 
 
 
86f4c82
c6a3487
86f4c82
 
 
 
 
 
 
b47f655
 
 
 
86f4c82
 
 
 
2d86dae
 
 
86f4c82
 
2d86dae
 
 
 
86f4c82
 
 
 
91985a9
 
 
 
 
d8438e3
 
86f4c82
 
 
 
 
6001f15
 
 
 
 
86f4c82
 
 
c4a2fde
 
 
 
 
 
91985a9
e9ae0e6
91985a9
d8438e3
c4a2fde
d8438e3
86f4c82
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c349d48
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
86f4c82
c349d48
d8438e3
c349d48
 
 
 
d8438e3
 
 
c349d48
d8438e3
c349d48
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
aa468bc
c349d48
86f4c82
 
 
 
 
d8438e3
 
 
 
 
 
aa468bc
86f4c82
aa468bc
86f4c82
d8438e3
 
 
86f4c82
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42c3910
3d1ce67
 
86f4c82
3d1ce67
 
 
86f4c82
3d1ce67
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
86f4c82
3d1ce67
86f4c82
 
 
aa468bc
 
 
 
 
 
42c3910
 
 
86f4c82
 
 
42c3910
 
 
aa468bc
86f4c82
 
42c3910
86f4c82
42c3910
 
 
 
 
86f4c82
 
3b36cbf
 
86f4c82
 
1004573
 
42c3910
86f4c82
 
 
 
 
 
 
 
 
aa468bc
 
 
86f4c82
aa468bc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
86f4c82
aa468bc
 
1004573
 
aa468bc
 
 
 
3b36cbf
aa468bc
1004573
 
 
 
 
 
42c3910
 
86f4c82
42c3910
 
 
 
 
3b36cbf
 
42c3910
86f4c82
aa468bc
 
 
 
c334682
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2d86dae
aa468bc
 
 
 
 
 
 
 
 
 
 
 
c334682
aa468bc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c334682
 
 
aa468bc
2d86dae
 
 
c334682
aa468bc
 
 
42c3910
 
aa468bc
86f4c82
ad9edf1
 
 
 
aa468bc
 
ad9edf1
 
 
aa468bc
ad9edf1
 
 
 
 
 
 
 
 
 
 
aa468bc
 
 
ad9edf1
 
a50e136
 
 
ad9edf1
 
 
 
 
 
 
 
 
 
 
 
 
 
aa468bc
ad9edf1
 
aa468bc
ad9edf1
aa468bc
 
 
ad9edf1
aa468bc
 
42c3910
aa468bc
 
 
 
 
 
 
 
 
 
42c3910
 
aa468bc
 
 
 
42c3910
aa468bc
 
 
 
 
 
42c3910
 
 
aa468bc
42c3910
aa468bc
42c3910
3b36cbf
 
ad9edf1
aa468bc
 
1004573
 
42c3910
ad9edf1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1004573
 
ad9edf1
 
 
 
3b36cbf
ad9edf1
1004573
 
ad9edf1
 
 
 
1004573
 
 
aa468bc
42c3910
 
 
 
 
3b36cbf
 
42c3910
aa468bc
 
 
 
 
 
2d86dae
 
aa468bc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2d86dae
 
 
aa468bc
 
 
3b36cbf
ad9edf1
aa468bc
 
ad9edf1
 
 
 
aa468bc
 
ad9edf1
 
 
aa468bc
ad9edf1
 
 
 
 
 
 
 
 
 
 
aa468bc
 
 
ad9edf1
a50e136
 
 
ad9edf1
 
 
 
 
 
 
 
 
 
 
 
 
aa468bc
ad9edf1
 
aa468bc
ad9edf1
aa468bc
 
 
ad9edf1
aa468bc
 
3b36cbf
aa468bc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3b36cbf
 
 
ad9edf1
aa468bc
 
1004573
 
3b36cbf
ad9edf1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1004573
 
ad9edf1
 
 
 
3b36cbf
ad9edf1
1004573
 
ad9edf1
 
 
 
1004573
 
 
aa468bc
3b36cbf
 
 
 
 
 
 
aa468bc
 
 
 
 
 
2d86dae
 
aa468bc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2d86dae
 
 
aa468bc
 
 
3b36cbf
ad9edf1
aa468bc
 
ad9edf1
 
 
 
aa468bc
 
ad9edf1
 
 
aa468bc
ad9edf1
 
 
 
 
 
 
 
 
 
 
aa468bc
 
 
ad9edf1
a50e136
 
 
ad9edf1
 
 
 
 
 
 
 
 
 
 
 
 
aa468bc
ad9edf1
 
aa468bc
ad9edf1
aa468bc
 
 
ad9edf1
aa468bc
 
3b36cbf
aa468bc
 
 
 
 
 
 
 
 
 
 
86f4c82
aa468bc
 
 
 
 
 
 
 
 
 
 
 
 
 
3b36cbf
 
 
ad9edf1
aa468bc
 
3b36cbf
ad9edf1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3b36cbf
ad9edf1
 
 
 
 
aa468bc
3b36cbf
 
 
 
 
 
 
aa468bc
 
 
 
 
 
2d86dae
 
aa468bc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2d86dae
 
 
aa468bc
 
 
3b36cbf
 
aa468bc
 
 
 
 
3b36cbf
aa468bc
 
 
 
 
 
 
3b36cbf
aa468bc
 
3b36cbf
aa468bc
 
 
 
 
 
 
 
 
 
3b36cbf
aa468bc
 
 
3b36cbf
aa468bc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3b36cbf
aa468bc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3b36cbf
 
aa468bc
 
3b36cbf
aa468bc
 
3b36cbf
aa468bc
3b36cbf
aa468bc
3b36cbf
 
 
 
 
 
 
aa468bc
 
 
 
3b36cbf
 
aa468bc
 
3b36cbf
 
 
 
 
 
 
 
 
 
 
aa468bc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3b36cbf
 
aa468bc
 
 
3b36cbf
 
 
 
 
 
 
 
 
 
 
 
aa468bc
 
 
3b36cbf
 
 
 
 
 
 
aa468bc
 
 
 
 
 
2d86dae
 
aa468bc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2d86dae
 
 
aa468bc
 
 
3b36cbf
 
aa468bc
 
 
 
 
3b36cbf
aa468bc
 
 
 
 
 
3b36cbf
aa468bc
 
 
 
 
 
 
 
 
 
3b36cbf
aa468bc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3b36cbf
aa468bc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3b36cbf
 
aa468bc
 
3b36cbf
aa468bc
 
3b36cbf
aa468bc
3b36cbf
aa468bc
3b36cbf
 
 
 
 
 
 
aa468bc
 
 
 
3b36cbf
 
aa468bc
 
3b36cbf
 
 
 
 
 
 
 
 
 
 
aa468bc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3b36cbf
 
aa468bc
 
3b36cbf
 
 
 
 
 
 
 
 
 
 
 
aa468bc
 
3b36cbf
 
 
 
 
 
 
 
aa468bc
 
 
 
3b36cbf
 
aa468bc
 
 
 
3b36cbf
 
 
 
 
 
 
 
 
 
aa468bc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3b36cbf
 
aa468bc
3b36cbf
 
 
 
 
 
 
 
 
 
 
 
aa468bc
 
3b36cbf
 
 
 
 
 
 
 
aa468bc
 
 
 
42c3910
aa468bc
 
 
 
86f4c82
aa468bc
86f4c82
aa468bc
 
 
 
 
 
 
 
42c3910
 
 
 
 
 
892cb58
aa468bc
42c3910
 
aa468bc
 
 
 
892cb58
42c3910
 
 
 
aa468bc
 
42c3910
 
 
3b36cbf
892cb58
3b36cbf
 
 
42c3910
 
 
 
 
 
 
aa468bc
 
42c3910
 
aa468bc
42c3910
aa468bc
 
 
 
892cb58
42c3910
 
 
 
 
aa468bc
892cb58
aa468bc
42c3910
 
3b36cbf
892cb58
 
3b36cbf
 
42c3910
 
 
 
 
 
 
aa468bc
892cb58
aa468bc
42c3910
3b36cbf
 
892cb58
 
3b36cbf
 
42c3910
 
3b36cbf
 
aa468bc
892cb58
aa468bc
42c3910
3b36cbf
 
892cb58
 
3b36cbf
 
42c3910
 
3b36cbf
 
aa468bc
892cb58
aa468bc
42c3910
3b36cbf
892cb58
3b36cbf
 
 
 
 
42c3910
 
3b36cbf
 
 
aa468bc
892cb58
aa468bc
42c3910
3b36cbf
892cb58
3b36cbf
 
 
 
 
42c3910
 
3b36cbf
 
 
aa468bc
892cb58
aa468bc
42c3910
3b36cbf
892cb58
3b36cbf
 
 
 
 
42c3910
 
3b36cbf
 
 
aa468bc
892cb58
aa468bc
42c3910
3b36cbf
892cb58
3b36cbf
 
 
 
 
42c3910
 
3b36cbf
 
 
aa468bc
892cb58
aa468bc
42c3910
3b36cbf
892cb58
3b36cbf
 
 
 
 
42c3910
 
3b36cbf
 
 
42c3910
 
 
 
 
3b36cbf
 
 
aa468bc
 
 
42c3910
3b36cbf
 
 
aa468bc
 
 
 
 
 
 
86f4c82
 
aa468bc

"""

数据同步脚本 - Sync Space 专用版

从 AkShare 抓取数据并同步到 Hugging Face Dataset

"""

import os
import sys
import logging
import time
import threading
import gc
from datetime import datetime, timedelta
from concurrent.futures import ThreadPoolExecutor, as_completed
from typing import List, Optional, Dict, Any
from pathlib import Path

import pandas as pd
import akshare as ak
from huggingface_hub import hf_hub_download, upload_file, list_repo_files

# Tushare 适配器（优先使用）
from app.tushare_adapter import (
    get_stock_list_tushare,
    get_stock_daily_tushare,
    get_dividend_tushare,
    TUSHARE_AVAILABLE
)

# 混合数据适配器（yfinance + efinance，作为回退）
from app.hybrid_adapter import (
    get_stock_daily_yfinance, 
    get_index_daily_yfinance,
    get_fund_flow_efinance,
    YFINANCE_AVAILABLE
)

# 添加当前目录到路径
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))

from app.database import get_db
from app.database_user import get_beijing_time
from app.sync_status import get_sync_status
from app.stock_list_cache import get_cached_stock_list, save_stock_list_cache

logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)

# 配置
YEARS_OF_DATA = 10

def _safe_int_env(var_name: str, default: int) -> int:
    """安全地读取环境变量并转换为整数"""
    try:
        value = os.getenv(var_name)
        if value is None:
            return default
        return int(value)
    except (ValueError, TypeError):
        logger.warning(f"Invalid value for {var_name}, using default: {default}")
        return default

# 动态线程数配置（延迟计算，避免导入时触发 multiprocessing）
def get_thread_config():
    """获取线程池配置（延迟计算）"""
    import multiprocessing
    cpu_count = multiprocessing.cpu_count()
    
    # 分层并发策略（降低并发，避免触发服务端限流）
    config = {
        'daily': _safe_int_env("MAX_WORKERS_DAILY", min(4, cpu_count)),
        'fund': _safe_int_env("MAX_WORKERS_FUND", min(3, cpu_count)),
        'valuation': _safe_int_env("MAX_WORKERS_VALUATION", min(3, cpu_count)),
        'margin': _safe_int_env("MAX_WORKERS_MARGIN", min(3, cpu_count)),
        'financial': _safe_int_env("MAX_WORKERS_FINANCIAL", min(2, cpu_count)),
        'dividend': _safe_int_env("MAX_WORKERS_DIVIDEND", min(2, cpu_count)),
    }
    
    # 向后兼容
    legacy = _safe_int_env("MAX_WORKERS", 0)
    if legacy > 0:
        config = {k: legacy for k in config}
    
    return cpu_count, config

# 延迟初始化线程配置（在 main 中调用）
_CPU_COUNT = None
_THREAD_CONFIG = None
_thread_config_lock = threading.Lock()

def get_thread_config_safe():
    """安全获取线程配置（自动初始化，线程安全）"""
    global _CPU_COUNT, _THREAD_CONFIG
    if _THREAD_CONFIG is None:
        with _thread_config_lock:
            # 双重检查锁定模式
            if _THREAD_CONFIG is None:
                _CPU_COUNT, _THREAD_CONFIG = get_thread_config()
                logger.info(f"Thread pool config: CPU={_CPU_COUNT}, "
                            f"Daily={_THREAD_CONFIG['daily']}, Fund={_THREAD_CONFIG['fund']}, "
                            f"Valuation={_THREAD_CONFIG['valuation']}, Margin={_THREAD_CONFIG['margin']}, "
                            f"Financial={_THREAD_CONFIG['financial']}, Dividend={_THREAD_CONFIG['dividend']}")
    return _THREAD_CONFIG

def init_thread_config():
    """初始化线程配置（在 main 中调用）"""
    get_thread_config_safe()

def get_stock_list() -> pd.DataFrame:
    """获取全市场标的列表（带缓存机制）"""
    # 1. 尝试使用缓存
    cached_df = get_cached_stock_list()
    if cached_df is not None:
        return cached_df
    
    # 2. 缓存无效，重新获取
    logger.info("Fetching all-market target list...")
    all_lists = []
    
    # A股列表获取（优先使用Tushare，失败则回退到AkShare）
    max_retries = 5
    base_delay = 2.0
    
    # 尝试使用Tushare获取A股列表（更稳定，字段完整）
    if TUSHARE_AVAILABLE:
        try:
            df_a = get_stock_list_tushare()
            if df_a is not None and len(df_a) > 0:
                all_lists.append(df_a)
                logger.info(f"A-stock list fetched from Tushare: {len(df_a)} stocks")
            else:
                raise Exception("Tushare returned empty data")
        except Exception as e:
            logger.warning(f"Tushare get_stock_list failed: {e}, falling back to AkShare")
    
    # 如果Tushare失败或不可用，使用AkShare
    if not all_lists:
        for attempt in range(max_retries):
            try:
                # 使用 stock_info_a_code_name() 替代 stock_zh_a_spot_em()，更稳定
                df_a = ak.stock_info_a_code_name()
                df_a.columns = ['code', 'name']
                df_a['market'] = df_a['code'].apply(lambda x: '主板' if x.startswith(('60', '00')) else ('创业板' if x.startswith('30') else ('科创板' if x.startswith('68') else ('北交所' if x.startswith(('8', '4', '920')) else '其他'))))
                all_lists.append(df_a)
                logger.info(f"A-stock list fetched from AkShare: {len(df_a)} stocks")
                break  # 成功则退出重试循环
            except Exception as e:
                if attempt == max_retries - 1:
                    logger.error(f"Failed to fetch A-stock list after {max_retries} attempts: {e}")
                else:
                    delay = base_delay * (2 ** attempt)
                    logger.warning(f"Attempt {attempt + 1} failed, retrying in {delay}s... Error: {e}")
                    time.sleep(delay)

    # ETF (增加容错)
    try:
        df_etf = ak.fund_etf_spot_em()
        # 检查实际列名
        code_cols = ['代码', 'code', 'fund_code', 'ETF代码']
        name_cols = ['名称', 'name', 'fund_name', 'ETF名称']
        
        c_code = None
        c_name = None
        
        for col in code_cols:
            if col in df_etf.columns:
                c_code = col
                break
        
        for col in name_cols:
            if col in df_etf.columns:
                c_name = col
                break
        
        if c_code and c_name:
            df_etf = df_etf[[c_code, c_name]]
            df_etf.columns = ['code', 'name']
            df_etf['market'] = 'ETF'
            all_lists.append(df_etf)
            logger.info(f"ETF list fetched: {len(df_etf)} funds")
        else:
            logger.warning(f"Could not find code/name columns in ETF data. Available columns: {df_etf.columns.tolist()}")
    except Exception as e:
        logger.warning(f"ETF list fetch failed: {e}")

    # LOF
    try:
        df_lof = ak.fund_lof_spot_em()
        # 检查实际列名
        code_cols = ['代码', 'code', 'fund_code', 'LOF代码']
        name_cols = ['名称', 'name', 'fund_name', 'LOF名称']
        
        c_code = None
        c_name = None
        
        for col in code_cols:
            if col in df_lof.columns:
                c_code = col
                break
        
        for col in name_cols:
            if col in df_lof.columns:
                c_name = col
                break
        
        if c_code and c_name:
            df_lof = df_lof[[c_code, c_name]]
            df_lof.columns = ['code', 'name']
            df_lof['market'] = 'LOF'
            all_lists.append(df_lof)
            logger.info(f"LOF list fetched: {len(df_lof)} funds")
        else:
            logger.warning(f"Could not find code/name columns in LOF data. Available columns: {df_lof.columns.tolist()}")
    except Exception as e:
        logger.warning(f"LOF list fetch failed: {e}")

    # REITs
    try:
        df_reits = ak.reits_realtime_em()
        # 检查实际列名
        code_cols = ['代码', 'code', 'REITs代码']
        name_cols = ['名称', 'name', 'REITs名称']
        
        c_code = None
        c_name = None
        
        for col in code_cols:
            if col in df_reits.columns:
                c_code = col
                break
        
        for col in name_cols:
            if col in df_reits.columns:
                c_name = col
                break
        
        if c_code and c_name:
            df_reits = df_reits[[c_code, c_name]]
            df_reits.columns = ['code', 'name']
            df_reits['market'] = 'REITs'
            all_lists.append(df_reits)
            logger.info(f"REITs list fetched: {len(df_reits)} products")
        else:
            logger.warning(f"Could not find code/name columns in REITs data. Available columns: {df_reits.columns.tolist()}")
    except Exception as e:
        logger.warning(f"REITs list fetch failed: {e}")
    
    # 可转债
    try:
        df_cb = ak.bond_zh_hs_cov_spot()
        # 检查实际列名
        logger.info(f"Convertible bond columns: {df_cb.columns.tolist()}")
        
        # 尝试找到正确的代码列名
        code_cols = ['代码', 'symbol', 'bond_code', '代码', '转债代码', '代码']
        name_cols = ['名称', 'name', 'bond_name', '转债名称', '名称']
        
        c_code = None
        c_name = None
        
        for col in code_cols:
            if col in df_cb.columns:
                c_code = col
                break
        
        for col in name_cols:
            if col in df_cb.columns:
                c_name = col
                break
        
        if c_code and c_name:
            # 过滤掉未上市可转债（成交额=0 或 最新价=0）
            # 检查是否有成交额列
            amount_col = None
            for col in ['amount', '成交额', 'Amount']:
                if col in df_cb.columns:
                    amount_col = col
                    break
            
            if amount_col:
                before_count = len(df_cb)
                df_cb = df_cb[df_cb[amount_col] > 0]
                filtered_count = before_count - len(df_cb)
                if filtered_count > 0:
                    logger.info(f"Filtered {filtered_count} unlisted convertible bonds (amount=0)")
            
            df_cb = df_cb[[c_code, c_name]]
            df_cb.columns = ['code', 'name']
            df_cb['market'] = '可转债'
            all_lists.append(df_cb)
            logger.info(f"Convertible bond list fetched: {len(df_cb)} bonds")
        else:
            logger.warning(f"Could not find code/name columns in convertible bond data. Available columns: {df_cb.columns.tolist()}")
    except Exception as e:
        logger.warning(f"Convertible bond list fetch failed: {e}")

    if not all_lists:
        db = get_db()
        return db.conn.execute("SELECT code, name, market FROM stock_list").df()
        
    df = pd.concat(all_lists).drop_duplicates(subset=['code'])
    df['list_date'] = None
    
    # 3. 保存到缓存
    save_stock_list_cache(df)
    
    return df

def get_target_daily(code: str, start_date: str, market: str) -> Optional[pd.DataFrame]:
    """抓取单只标的数据"""
    max_retries = 5
    base_delay = 1.0  # 寂础延迟秒
    
    for attempt in range(max_retries):
        try:
            # 指数退避：每次重试增加延迟
            if attempt > 0:
                delay = base_delay * (2 ** attempt)  # 1s, 2s, 4s, 8s
                time.sleep(delay)
            end_date = get_beijing_time().strftime('%Y%m%d')
            fetch_start = start_date.replace('-', '')
            df = None
            if market == 'INDEX':
                # 指数：优先使用 yfinance，失败则回退 AkShare
                if YFINANCE_AVAILABLE:
                    df = get_index_daily_yfinance(code, fetch_start, end_date)
                    if df is not None:
                        logger.debug(f"Got index data from yfinance for {code}")
                if df is None:
                    df = ak.stock_zh_index_daily_em(symbol=f"sh{code}" if code.startswith('000') else f"sz{code}")
            elif market == 'ETF':
                df = ak.fund_etf_hist_em(symbol=code, period="daily", start_date=fetch_start, end_date=end_date, adjust="hfq")
            elif market == 'LOF':
                df = ak.fund_lof_hist_em(symbol=code, period="daily", start_date=fetch_start, end_date=end_date, adjust="hfq")
            elif market == '可转债':
                # bond_zh_hs_cov_daily 需要带交易所前缀的代码（如 sh110048, sz123015）
                # 北交所可转债（bj开头）不支持
                if code.startswith('bj'):
                    return None  # 北交所可转债 API 不支持
                df = ak.bond_zh_hs_cov_daily(symbol=code)
            elif market == 'REITs':
                df = ak.reits_hist_em(symbol=code)
            else:
                # A股个股：优先使用 Tushare，失败则回退 yfinance/AkShare
                if TUSHARE_AVAILABLE:
                    df = get_stock_daily_tushare(code, fetch_start, end_date, adj='qfq')
                    if df is not None:
                        logger.debug(f"Got data from Tushare for {code}")
                if df is None and YFINANCE_AVAILABLE:
                    df = get_stock_daily_yfinance(code, fetch_start, end_date)
                    if df is not None:
                        logger.debug(f"Got data from yfinance for {code}")
                if df is None:
                    logger.debug(f"Tushare/yfinance failed, falling back to AkShare for {code}")
                    df = ak.stock_zh_a_hist(symbol=code, period="daily", start_date=fetch_start, end_date=end_date, adjust="hfq")
            
            if df is not None and not df.empty:
                # 标准化列名
                rename_map = {
                    '日期': 'trade_date', 'date': 'trade_date', 'Date': 'trade_date',
                    '开盘': 'open', '今开': 'open', 'Open': 'open',
                    '最高': 'high', 'High': 'high',
                    '最低': 'low', 'Low': 'low',
                    '收盘': 'close', '最新价': 'close', 'Close': 'close',
                    '成交量': 'volume', 'Volume': 'volume',
                    '成交额': 'amount', 'Amount': 'amount',
                    '涨跌幅': 'pct_chg',
                    '换手率': 'turnover_rate', '换手': 'turnover_rate'
                }
                df = df.rename(columns=rename_map)
                
                if 'trade_date' not in df.columns:
                    df = df.reset_index().rename(columns={'index': 'trade_date', 'date': 'trade_date'})
                
                df['trade_date'] = pd.to_datetime(df['trade_date'])
                df = df[df['trade_date'] >= pd.to_datetime(start_date)]
                if 'amount' not in df.columns: df['amount'] = 0
                if 'pct_chg' not in df.columns: df['pct_chg'] = df['close'].pct_change() * 100
                if 'turnover_rate' not in df.columns: df['turnover_rate'] = 0
                df['code'] = code
                return df[['code', 'trade_date', 'open', 'high', 'low', 'close', 'volume', 'amount', 'pct_chg', 'turnover_rate']]
        except Exception as e:
            if attempt == max_retries - 1:
                logger.warning(f"Failed to fetch {code} ({market}): {str(e)}")
            time.sleep(1)
    return None

def check_today_data_available() -> bool:
    """

    探测当日数据是否已更新可用

    通过尝试获取一只活跃股票（平安银行 000001）的当日数据来判断

    

    Returns:

        True: 当日数据已可用

        False: 当日数据尚未更新

    """
    try:
        today = get_beijing_time().strftime('%Y%m%d')
        
        # 优先使用 Tushare 探测
        if TUSHARE_AVAILABLE:
            try:
                import tushare as ts
                ts.set_token(TUSHARE_TOKENS[0])
                pro = ts.pro_api()
                df = pro.daily(ts_code='000001.SZ', trade_date=today)
                if df is not None and not df.empty:
                    logger.debug(f"Tushare check: today data available")
                    return True
            except Exception:
                pass
        
        # 回退到 AkShare 探测
        try:
            df = ak.stock_zh_a_spot_em()
            if df is not None and not df.empty:
                # 检查是否有今日数据
                logger.debug(f"AkShare check: today data available")
                return True
        except Exception:
            pass
            
    except Exception as e:
        logger.debug(f"Check today data failed: {e}")
    
    return False


def get_last_trading_day() -> str:
    """获取最近一个交易日（通过探测数据可用性）

    

    逻辑：

    1. 先尝试获取当日数据（探测 000001.SZ）

    2. 如果能获取到，说明当日数据已更新，返回今日

    3. 如果获取不到，说明当日数据未更新，返回上一个交易日

    """
    now = get_beijing_time()
    today = now.date()
    today_str = today.strftime('%Y-%m-%d')
    
    # 探测当日数据是否可用
    logger.info(f"Checking if today ({today_str}) data is available...")
    
    if check_today_data_available():
        logger.info(f"Today data is available, using today: {today_str}")
        return today_str
    else:
        # 获取上一个交易日
        try:
            # 使用交易日历获取上一个交易日
            df = ak.tool_trade_date_hist_sina()
            if df is not None and not df.empty:
                df['trade_date'] = pd.to_datetime(df['trade_date']).dt.date
                prev_trading_days = df[df['trade_date'] < today]['trade_date']
                if not prev_trading_days.empty:
                    last_day = prev_trading_days.iloc[-1]
                    logger.info(f"Today data not available yet, using previous trading day: {last_day}")
                    return last_day.strftime('%Y-%m-%d')
        except Exception as e:
            logger.warning(f"Failed to get previous trading day: {e}")
        
        # 回退：昨天
        yesterday = (today - timedelta(days=1)).strftime('%Y-%m-%d')
        logger.info(f"Using yesterday as fallback: {yesterday}")
        return yesterday

    # 备用：使用指数行情获取
    try:
        df = ak.stock_zh_index_daily_em(symbol="sh000300")
        if df is not None and not df.empty:
            date_col = 'date' if 'date' in df.columns else ('日期' if '日期' in df.columns else None)
            if date_col:
                last_date = pd.to_datetime(df[date_col].iloc[-1]).date()
                # 同样检查时间条件
                if last_date == today and current_hour < 20:
                    # 返回前一天的数据
                    return pd.to_datetime(df[date_col].iloc[-2]).strftime('%Y-%m-%d')
                return last_date.strftime('%Y-%m-%d')
    except Exception: pass

    # 最终回退：按工作日估算
    d = get_beijing_time()
    # 如果当前时间 < 20:00，从昨天开始查找
    if current_hour < 20:
        d -= timedelta(days=1)
    while d.weekday() >= 5:
        d -= timedelta(days=1)
    return d.strftime('%Y-%m-%d')


def get_index_daily(code: str) -> Optional[pd.DataFrame]:
    """抓取指数日线"""
    try:
        symbol = f"sh{code}" if code.startswith('000') else f"sz{code}"
        df = ak.stock_zh_index_daily_em(symbol=symbol)
        if df is None or df.empty:
            return None

        rename_map = {
            'date': 'trade_date', '日期': 'trade_date',
            'open': 'open', '开盘': 'open',
            'high': 'high', '最高': 'high',
            'low': 'low', '最低': 'low',
            'close': 'close', '收盘': 'close',
            'volume': 'volume', '成交量': 'volume',
            'amount': 'amount', '成交额': 'amount',
            'pct_chg': 'pct_chg', '涨跌幅': 'pct_chg'
        }
        df = df.rename(columns=rename_map)
        if 'trade_date' not in df.columns:
            return None

        df['trade_date'] = pd.to_datetime(df['trade_date'])
        if 'amount' not in df.columns:
            df['amount'] = 0
        if 'pct_chg' not in df.columns:
            df['pct_chg'] = df['close'].pct_change() * 100
        if 'volume' not in df.columns:
            df['volume'] = 0
        df['turnover_rate'] = 0
        df['code'] = code

        return df[['code', 'trade_date', 'open', 'high', 'low', 'close', 'volume', 'amount', 'pct_chg', 'turnover_rate']]
    except Exception as e:
        logger.warning(f"Failed to fetch index {code}: {e}")
        return None


def sync_stock_daily(targets: List[Dict[str, str]], last_trade_day: str) -> Dict[str, Any]:
    """增量同步逻辑，返回详细结果（采用全局水位线机制）"""
    logger.info("Syncing daily data...")
    
    # 1. 扫描本地 parquet 文件获取全局最新日期（类似其他指标）
    parquet_dir = Path("/tmp/data/parquet")
    parquet_dir.mkdir(parents=True, exist_ok=True)
    
    global_latest_date = "2000-01-01"
    existing_codes = set()
    
    for f in parquet_dir.glob("*.parquet"):
        if f.name.startswith('index_'):  # 跳过指数文件
            continue
        try:
            df = pd.read_parquet(f)
            if not df.empty and 'trade_date' in df.columns:
                max_date = df['trade_date'].max()
                if isinstance(max_date, pd.Timestamp):
                    max_date = max_date.strftime('%Y-%m-%d')
                if max_date > global_latest_date:
                    global_latest_date = max_date
                existing_codes.update(df['code'].unique())
        except Exception:
            pass
    
    # 2. 如果本地没有数据，尝试从云端下载最近3个月作为基准
    if global_latest_date == "2000-01-01":
        repo_id = os.getenv("DATASET_REPO_ID")
        if repo_id:
            try:
                files = list_repo_files(repo_id=repo_id, repo_type="dataset")
                parquet_files = sorted([f for f in files if f.startswith("data/parquet/") and f.endswith(".parquet")])
                
                # 下载最近3个月的数据作为基准
                for pf in parquet_files[-3:]:
                    try:
                        local_file = hf_hub_download(repo_id=repo_id, filename=pf, repo_type="dataset")
                        df = pd.read_parquet(local_file)
                        if not df.empty and 'trade_date' in df.columns:
                            max_date = df['trade_date'].max()
                            if isinstance(max_date, pd.Timestamp):
                                max_date = max_date.strftime('%Y-%m-%d')
                            if max_date > global_latest_date:
                                global_latest_date = max_date
                            existing_codes.update(df['code'].unique())
                    except Exception:
                        pass
                logger.info(f"Downloaded daily data from cloud, latest date: {global_latest_date}")
            except Exception as e:
                logger.info(f"No existing daily data in cloud: {e}")
    
    # 3. 区分新股和存量股票
    new_codes = [t for t in targets if t['code'] not in existing_codes]
    
    # 4. 全局水位线拦截
    if global_latest_date >= last_trade_day and not new_codes:
        logger.info(f"Daily data is already up to date ({global_latest_date}) and no new stocks. Skip.")
        return {'count': 0, 'failed_codes': [], 'status': 'skipped', 'message': f'Already up to date ({global_latest_date})'}
    
    # 5. 确定同步策略
    if global_latest_date >= last_trade_day:
        logger.info(f"Global date is up to date, but found {len(new_codes)} new stocks. Syncing new stocks only.")
        sync_targets = new_codes
        # 新股只获取最近1年数据（而非10年）
        start_dt = (get_beijing_time() - timedelta(days=365)).strftime('%Y-%m-%d')
    else:
        logger.info(f"Syncing daily data from {global_latest_date} to {last_trade_day}...")
        sync_targets = targets
        start_dt = (pd.to_datetime(global_latest_date) + timedelta(days=1)).strftime('%Y-%m-%d')
    
    # 设置每只股票的start_dt
    pending = []
    for t in sync_targets:
        t['start_dt'] = start_dt
        pending.append(t)

    # 应用 SYNC_LIMIT 限制
    sync_limit = int(os.getenv("SYNC_LIMIT", -1))
    if sync_limit > 0 and len(pending) > sync_limit:
        logger.info(f"Limiting sync to first {sync_limit} targets (out of {len(pending)})")
        pending = pending[:sync_limit]

    if not pending:
        return {'count': 0, 'failed_codes': [], 'status': 'skipped', 'message': 'No pending targets'}
    
    logger.info(f"Syncing {len(pending)} targets...")

    all_new_data = []
    failed_codes = []
    success_codes = []
    
    with ThreadPoolExecutor(max_workers=get_thread_config_safe()['daily']) as executor:
        futures = {executor.submit(get_target_daily, t['code'], t['start_dt'], t['market']): t['code'] for t in pending}
        for i, future in enumerate(as_completed(futures), 1):
            code = futures[future]
            res = future.result()
            if res is not None:
                all_new_data.append(res)
                success_codes.append(code)
            else:
                failed_codes.append(code)
            if i % 500 == 0: logger.info(f"Progress: {i}/{len(pending)}")

    changed_files = []  # 记录变更的文件名
    
    if all_new_data:
        inc_df = pd.concat(all_new_data, ignore_index=True)
        # 方案3：及时释放内存
        del all_new_data
        total_records = len(inc_df)
        # 识别变动月份
        changed = inc_df.assign(yr=inc_df['trade_date'].dt.year, mo=inc_df['trade_date'].dt.month)[['yr', 'mo']].drop_duplicates().values
        
        for yr, mo in changed:
            yr, mo = int(yr), int(mo)
            filename = f"{yr}-{mo:02d}.parquet"
            local_path = Path(f"/tmp/data/parquet/{filename}")  # Sync Space 使用 /tmp
            local_path.parent.mkdir(parents=True, exist_ok=True)
            
            # 增量核心：先检查本地是否有，没有再从云端拉取
            old_df = None
            if local_path.exists():
                try:
                    old_df = pd.read_parquet(local_path)
                    logger.info(f"Using local cache for {filename}")
                except Exception: pass
            
            if old_df is None:
                repo_id = os.getenv("DATASET_REPO_ID")
                if repo_id:
                    try:
                        old_file = hf_hub_download(repo_id=repo_id, filename=f"data/parquet/{filename}", repo_type="dataset")
                        old_df = pd.read_parquet(old_file)
                        logger.info(f"Downloaded {filename} from cloud")
                    except Exception:
                        pass

            # 合并新数据
            month_inc = inc_df[(inc_df['trade_date'].dt.year == yr) & (inc_df['trade_date'].dt.month == mo)]
            if old_df is not None:
                final_month_df = pd.concat([old_df, month_inc]).drop_duplicates(subset=['code', 'trade_date'])
                # 方案3：释放旧数据内存
                del old_df, month_inc
            else:
                final_month_df = month_inc
                
            final_month_df.to_parquet(local_path)
            changed_files.append(filename)  # 记录变更的文件
            logger.info(f"Saved updated data for {filename}")
            # 方案3：释放最终数据内存
            del final_month_df
        
        # 方案3：循环结束后释放inc_df并触发GC
        del inc_df
        gc.collect()
    else:
        total_records = 0
            
    return {
        'count': len(success_codes),
        'failed_codes': failed_codes,
        'status': 'success' if not failed_codes else 'partial_fail',
        'record_count': total_records,
        'success_rate': len(success_codes) / len(pending) if pending else 0,
        'changed_files': changed_files  # 返回变更文件列表
    }


# ==================== 新增：资金流向数据同步 ====================

def get_stock_fund_flow(code: str, market: str) -> Optional[pd.DataFrame]:
    """获取单只股票资金流向数据（优先使用 efinance，失败则回退 AkShare）"""
    
    # 标准化列名映射
    standard_cols = ['code', 'trade_date', 'close', 'pct_chg',
                     'main_net_inflow', 'main_net_inflow_pct',
                     'huge_net_inflow', 'huge_net_inflow_pct',
                     'large_net_inflow', 'large_net_inflow_pct',
                     'medium_net_inflow', 'medium_net_inflow_pct',
                     'small_net_inflow', 'small_net_inflow_pct']
    
    # 1. 优先尝试 efinance
    try:
        df = get_fund_flow_efinance(code, '20000101', '20991231')
        if df is not None and not df.empty:
            # efinance 字段映射
            rename_map = {
                '日期': 'trade_date', '收盘价': 'close', '涨跌幅': 'pct_chg',
                '主力净流入': 'main_net_inflow',
                '主力净流入占比': 'main_net_inflow_pct',
                '超大单净流入': 'huge_net_inflow',
                '超大单流入净占比': 'huge_net_inflow_pct',
                '大单净流入': 'large_net_inflow',
                '大单流入净占比': 'large_net_inflow_pct',
                '中单净流入': 'medium_net_inflow',
                '中单流入净占比': 'medium_net_inflow_pct',
                '小单净流入': 'small_net_inflow',
                '小单流入净占比': 'small_net_inflow_pct',
            }
            df = df.rename(columns=rename_map)
            if 'trade_date' in df.columns:
                df['trade_date'] = pd.to_datetime(df['trade_date'])
            df['code'] = code
            
            result_cols = [c for c in standard_cols if c in df.columns]
            logger.debug(f"Got fund flow from efinance for {code}")
            return df[result_cols]
    except Exception as e:
        logger.debug(f"efinance fund flow failed for {code}: {e}")
    
    # 2. 回退到 AkShare
    max_retries = 3
    base_delay = 1.0
    for attempt in range(max_retries):
        try:
            # 确定 market 参数
            if market == '北交所' or code.startswith(('8', '4', '920')):
                mk = 'bj'
            elif code.startswith(('6', '9')):
                mk = 'sh'
            else:
                mk = 'sz'
            
            df = ak.stock_individual_fund_flow(stock=code, market=mk)
            if df is not None and not df.empty:
                # AkShare 字段映射
                rename_map = {
                    '日期': 'trade_date', '收盘价': 'close', '涨跌幅': 'pct_chg',
                    '主力净流入-净额': 'main_net_inflow',
                    '主力净流入-净占比': 'main_net_inflow_pct',
                    '超大单净流入-净额': 'huge_net_inflow',
                    '超大单净流入-净占比': 'huge_net_inflow_pct',
                    '大单净流入-净额': 'large_net_inflow',
                    '大单净流入-净占比': 'large_net_inflow_pct',
                    '中单净流入-净额': 'medium_net_inflow',
                    '中单净流入-净占比': 'medium_net_inflow_pct',
                    '小单净流入-净额': 'small_net_inflow',
                    '小单净流入-净占比': 'small_net_inflow_pct',
                }
                df = df.rename(columns=rename_map)
                df['trade_date'] = pd.to_datetime(df['trade_date'])
                df['code'] = code
                
                result_cols = [c for c in standard_cols if c in df.columns]
                logger.debug(f"Got fund flow from AkShare for {code}")
                return df[result_cols]
        except Exception as e:
            if attempt > 0:
                delay = base_delay * (2 ** attempt)
                time.sleep(delay)
    
    return None


def sync_fund_flow(targets: List[Dict[str, str]], last_trade_day: str) -> Dict[str, Any]:
    """同步资金流向数据（按月分表版），返回详细结果"""
    logger.info("Syncing fund flow data...")
    
    # 1. 从所有本地 parquet 文件计算全局最新日期
    flow_dir = Path("/tmp/data/fund_flow")
    flow_dir.mkdir(parents=True, exist_ok=True)
    
    global_latest_date = "2000-01-01"
    existing_codes = set()
    
    # 扫描本地已有的月度文件
    for f in flow_dir.glob("*.parquet"):
        try:
            df = pd.read_parquet(f)
            if not df.empty:
                max_date = df['trade_date'].max().strftime('%Y-%m-%d')
                if max_date > global_latest_date:
                    global_latest_date = max_date
                existing_codes.update(df['code'].unique())
        except Exception:
            pass
    
    # 如果本地没有数据，尝试从云端下载
    if global_latest_date == "2000-01-01":
        repo_id = os.getenv("DATASET_REPO_ID")
        if repo_id:
            try:
                # 获取云端文件列表
                files = list_repo_files(repo_id=repo_id, repo_type="dataset")
                flow_files = sorted([f for f in files if f.startswith("data/fund_flow/") and f.endswith(".parquet")])

                for ff in flow_files[-3:]:  # 先下载最近3个月的数据作为基准
                    try:
                        local_file = hf_hub_download(repo_id=repo_id, filename=ff, repo_type="dataset")
                        df = pd.read_parquet(local_file)
                        if not df.empty:
                            # 提取月份
                            filename = Path(ff).stem  # 如 "2026-02"
                            local_path = flow_dir / f"{filename}.parquet"
                            df.to_parquet(local_path)
                            max_date = df['trade_date'].max().strftime('%Y-%m-%d')
                            if max_date > global_latest_date:
                                global_latest_date = max_date
                            existing_codes.update(df['code'].unique())
                    except Exception:
                        pass
                logger.info(f"Downloaded fund flow from cloud, latest date: {global_latest_date}")
            except Exception as e:
                logger.info(f"No existing fund flow data in cloud: {e}")

    # 2. 过滤目标（排除 ETF/LOF/REITs/可转债）
    stock_targets = [t for t in targets if t['market'] not in ['ETF', 'LOF', 'REITs', '可转债']]
    new_codes = [t for t in stock_targets if t['code'] not in existing_codes]
    
    # 3. 全局水位线拦截
    if global_latest_date >= last_trade_day and not new_codes:
        logger.info(f"Fund flow data is already up to date ({global_latest_date}) and no new stocks. Skip.")
        return {'count': 0, 'failed_codes': [], 'status': 'skipped', 'message': f'Already up to date ({global_latest_date})'}

    # 4. 增量获取
    if global_latest_date >= last_trade_day:
        logger.info(f"Global date is up to date, but found {len(new_codes)} new stocks. Syncing new stocks only.")
        sync_targets = new_codes
    else:
        logger.info(f"Syncing fund flow from {global_latest_date} to {last_trade_day}...")
        sync_targets = stock_targets

    all_data = []
    success_codes = []
    failed_codes = []
    
    with ThreadPoolExecutor(max_workers=get_thread_config_safe()['fund']) as executor:
        futures = {executor.submit(get_stock_fund_flow, t['code'], t['market']): t['code'] for t in sync_targets}
        for i, future in enumerate(as_completed(futures), 1):
            code = futures[future]
            res = future.result()
            if res is not None and not res.empty:
                if code in existing_codes:
                    res = res[res['trade_date'] > pd.to_datetime(global_latest_date)]
                if not res.empty:
                    all_data.append(res)
                    success_codes.append(code)
            else:
                failed_codes.append(code)
            if i % 500 == 0:
                logger.info(f"Fund flow progress: {i}/{len(sync_targets)}, success: {len(success_codes)}")
    
    total_records = 0
    changed_files = []  # 记录变更的文件名
    
    # 5. 按月分表保存
    if all_data:
        new_df = pd.concat(all_data, ignore_index=True)
        # 方案3：及时释放内存
        del all_data
        total_records = len(new_df)
        
        # 确定需要更新的月份
        if not new_df.empty:
            min_date = new_df['trade_date'].min()
            max_date = new_df['trade_date'].max()
            
            current = min_date.to_period('M')
            end_period = max_date.to_period('M')
            
            while current <= end_period:
                yr, mo = current.year, current.month
                month_data = new_df[(new_df['trade_date'].dt.year == yr) & (new_df['trade_date'].dt.month == mo)]
                
                if not month_data.empty:
                    filename = f"{yr}-{mo:02d}.parquet"
                    local_path = flow_dir / filename
                    
                    # 合并已有数据
                    old_month_df = None
                    if local_path.exists():
                        try:
                            old_month_df = pd.read_parquet(local_path)
                        except Exception:
                            pass
                    
                    if old_month_df is not None:
                        final_month_df = pd.concat([old_month_df, month_data]).drop_duplicates(subset=['code', 'trade_date'])
                        # 方案3：释放旧数据内存
                        del old_month_df, month_data
                    else:
                        final_month_df = month_data
                    
                    final_month_df.to_parquet(local_path)
                    changed_files.append(filename)  # 记录变更的文件
                    logger.info(f"Saved fund flow data for {filename}")
                    # 方案3：释放最终数据内存
                    del final_month_df
                
                current += 1
        
        logger.info(f"Fund flow updated: {len(new_df)} new records")
        # 方案3：释放new_df并触发GC
        del new_df
        gc.collect()
    
    return {
        'count': len(success_codes),
        'failed_codes': failed_codes,
        'status': 'success' if not failed_codes else 'partial_fail',
        'record_count': total_records,
        'success_rate': len(success_codes) / len(sync_targets) if sync_targets else 0,
        'changed_files': changed_files  # 返回变更文件列表
    }


# ==================== 新增：估值指标数据同步 ====================

def get_stock_valuation(code: str) -> Optional[pd.DataFrame]:
    """获取单只股票估值指标数据"""
    max_retries = 5
    base_delay = 1.0
    for attempt in range(max_retries):
        try:
            df = ak.stock_a_lg_indicator(symbol=code)
            if df is not None and not df.empty:
                # 标准化列名
                rename_map = {
                    '日期': 'trade_date',
                    '市盈率': 'pe_ttm',
                    '市盈率TTM': 'pe_ttm',
                    '静态市盈率': 'pe_static',
                    '市净率': 'pb',
                    '市销率': 'ps_ttm',
                    '股息率': 'dv_ratio',
                    '总市值': 'total_mv',
                    '流通市值': 'circ_mv',
                }
                df = df.rename(columns=rename_map)
                df['trade_date'] = pd.to_datetime(df['trade_date'])
                df['code'] = code
                
                cols = ['code', 'trade_date', 'pe_ttm', 'pe_static', 'pb', 
                        'ps_ttm', 'dv_ratio', 'total_mv', 'circ_mv']
                available_cols = [c for c in cols if c in df.columns]
                return df[available_cols]
        except Exception:
            if attempt > 0:
                delay = base_delay * (2 ** attempt)
                time.sleep(delay)
    return None


def sync_valuation(targets: List[Dict[str, str]], last_trade_day: str) -> Dict[str, Any]:
    """同步估值指标数据（按月分表版）"""
    logger.info("Syncing valuation data...")
    
    # 1. 从所有本地 parquet 文件计算全局最新日期
    val_dir = Path("/tmp/data/valuation")
    val_dir.mkdir(parents=True, exist_ok=True)
    
    global_latest_date = "2000-01-01"
    existing_codes = set()
    
    # 扫描本地已有的月度文件
    for f in val_dir.glob("*.parquet"):
        try:
            df = pd.read_parquet(f)
            if not df.empty:
                max_date = df['trade_date'].max().strftime('%Y-%m-%d')
                if max_date > global_latest_date:
                    global_latest_date = max_date
                existing_codes.update(df['code'].unique())
        except Exception:
            pass
    
    # 如果本地没有数据，尝试从云端下载
    if global_latest_date == "2000-01-01":
        repo_id = os.getenv("DATASET_REPO_ID")
        if repo_id:
            try:
                files = list_repo_files(repo_id=repo_id, repo_type="dataset")
                val_files = sorted([f for f in files if f.startswith("data/valuation/") and f.endswith(".parquet")])

                for vf in val_files[-3:]:  # 先下载最近3个月
                    try:
                        local_file = hf_hub_download(repo_id=repo_id, filename=vf, repo_type="dataset")
                        df = pd.read_parquet(local_file)
                        if not df.empty:
                            filename = Path(vf).stem
                            local_path = val_dir / f"{filename}.parquet"
                            df.to_parquet(local_path)
                            max_date = df['trade_date'].max().strftime('%Y-%m-%d')
                            if max_date > global_latest_date:
                                global_latest_date = max_date
                            existing_codes.update(df['code'].unique())
                    except Exception:
                        pass
                logger.info(f"Downloaded valuation from cloud, latest date: {global_latest_date}")
            except Exception as e:
                logger.info(f"No existing valuation data in cloud: {e}")

    # 2. 过滤目标（排除 ETF/LOF/REITs/可转债）
    stock_targets = [t for t in targets if t['market'] not in ['ETF', 'LOF', 'REITs', '可转债']]
    new_codes = [t for t in stock_targets if t['code'] not in existing_codes]

    # 3. 全局水位线拦截
    if global_latest_date >= last_trade_day and not new_codes:
        logger.info(f"Valuation data is already up to date ({global_latest_date}) and no new stocks. Skip.")
        return {'count': 0, 'failed_codes': [], 'status': 'skipped', 'record_count': 0, 'changed_files': [], 'message': f'Already up to date ({global_latest_date})'}

    # 4. 增量获取
    if global_latest_date >= last_trade_day:
        logger.info(f"Global date is up to date, but found {len(new_codes)} new stocks. Syncing new stocks only.")
        sync_targets = new_codes
    else:
        logger.info(f"Syncing valuation from {global_latest_date} to {last_trade_day}...")
        sync_targets = stock_targets

    all_data = []
    success_count = 0
    
    with ThreadPoolExecutor(max_workers=get_thread_config_safe()['valuation']) as executor:
        futures = {executor.submit(get_stock_valuation, t['code']): t['code'] for t in sync_targets}
        for i, future in enumerate(as_completed(futures), 1):
            res = future.result()
            if res is not None and not res.empty:
                code = futures[future]
                if code in existing_codes:
                    res = res[res['trade_date'] > pd.to_datetime(global_latest_date)]
                if not res.empty:
                    all_data.append(res)
                    success_count += 1
            if i % 500 == 0:
                logger.info(f"Valuation progress: {i}/{len(sync_targets)}, success: {success_count}")
    
    total_records = 0
    changed_files = []  # 记录变更的文件名
    
    # 5. 按月分表保存
    if all_data:
        new_df = pd.concat(all_data, ignore_index=True)
        # 方案3：及时释放内存
        del all_data
        total_records = len(new_df)
        
        if not new_df.empty:
            min_date = new_df['trade_date'].min()
            max_date = new_df['trade_date'].max()
            
            current = min_date.to_period('M')
            end_period = max_date.to_period('M')
            
            while current <= end_period:
                yr, mo = current.year, current.month
                month_data = new_df[(new_df['trade_date'].dt.year == yr) & (new_df['trade_date'].dt.month == mo)]
                
                if not month_data.empty:
                    filename = f"{yr}-{mo:02d}.parquet"
                    local_path = val_dir / filename
                    
                    old_month_df = None
                    if local_path.exists():
                        try:
                            old_month_df = pd.read_parquet(local_path)
                        except Exception:
                            pass
                    
                    if old_month_df is not None:
                        final_month_df = pd.concat([old_month_df, month_data]).drop_duplicates(subset=['code', 'trade_date'])
                        # 方案3：释放旧数据内存
                        del old_month_df, month_data
                    else:
                        final_month_df = month_data
                    
                    final_month_df.to_parquet(local_path)
                    changed_files.append(filename)  # 记录变更的文件
                    logger.info(f"Saved valuation data for {filename}")
                    # 方案3：释放最终数据内存
                    del final_month_df
                
                current += 1
        
        logger.info(f"Valuation updated: {len(new_df)} new records")
        # 方案3：释放new_df并触发GC
        del new_df
        gc.collect()
    
    return {
        'count': success_count,
        'failed_codes': [],
        'status': 'success',
        'record_count': total_records,
        'changed_files': changed_files  # 返回变更文件列表
    }


# ==================== 新增：融资融券数据同步 ====================

def get_stock_margin(code: str) -> Optional[pd.DataFrame]:
    """获取单只股票融资融券数据"""
    max_retries = 5
    base_delay = 1.0
    for attempt in range(max_retries):
        try:
            # 尝试上交所
            if code.startswith('6'):
                df = ak.stock_margin_detail_sh(symbol=code)
            else:
                df = ak.stock_margin_detail_sz(symbol=code)
            
            if df is not None and not df.empty:
                # 标准化列名
                rename_map = {
                    '日期': 'trade_date',
                    '融资余额': 'rzye',
                    '融资买入额': 'rzmre',
                    '融资偿还额': 'rzche',
                    '融券余额': 'rqye',
                    '融券卖出量': 'rqmcl',
                    '融资融券余额': 'rzrqye',
                }
                df = df.rename(columns=rename_map)
                if 'trade_date' in df.columns:
                    df['trade_date'] = pd.to_datetime(df['trade_date'])
                df['code'] = code
                
                cols = ['code', 'trade_date', 'rzye', 'rzmre', 'rzche', 'rqye', 'rqmcl', 'rzrqye']
                available_cols = [c for c in cols if c in df.columns]
                return df[available_cols]
        except Exception as e:
            if attempt == max_retries - 1:
                pass
            if attempt > 0:
                delay = base_delay * (2 ** attempt)
                time.sleep(delay)
    return None


def sync_margin(targets: List[Dict[str, str]], last_trade_day: str) -> Dict[str, Any]:
    """同步融资融券数据（按月分表版）"""
    logger.info("Syncing margin trading data...")
    
    # 1. 从所有本地 parquet 文件计算全局最新日期
    margin_dir = Path("/tmp/data/margin")
    margin_dir.mkdir(parents=True, exist_ok=True)
    
    global_latest_date = "2000-01-01"
    existing_codes = set()
    
    # 扫描本地已有的月度文件
    for f in margin_dir.glob("*.parquet"):
        try:
            df = pd.read_parquet(f)
            if not df.empty:
                max_date = df['trade_date'].max().strftime('%Y-%m-%d')
                if max_date > global_latest_date:
                    global_latest_date = max_date
                existing_codes.update(df['code'].unique())
        except Exception:
            pass
    
    # 如果本地没有数据，尝试从云端下载
    if global_latest_date == "2000-01-01":
        repo_id = os.getenv("DATASET_REPO_ID")
        if repo_id:
            try:
                files = list_repo_files(repo_id=repo_id, repo_type="dataset")
                margin_files = sorted([f for f in files if f.startswith("data/margin/") and f.endswith(".parquet")])

                for mf in margin_files[-3:]:  # 先下载最近3个月
                    try:
                        local_file = hf_hub_download(repo_id=repo_id, filename=mf, repo_type="dataset")
                        df = pd.read_parquet(local_file)
                        if not df.empty:
                            filename = Path(mf).stem
                            local_path = margin_dir / f"{filename}.parquet"
                            df.to_parquet(local_path)
                            max_date = df['trade_date'].max().strftime('%Y-%m-%d')
                            if max_date > global_latest_date:
                                global_latest_date = max_date
                            existing_codes.update(df['code'].unique())
                    except Exception:
                        pass
                logger.info(f"Downloaded margin from cloud, latest date: {global_latest_date}")
            except Exception as e:
                logger.info(f"No existing margin data in cloud: {e}")

    # 2. 过滤目标（只保留主板、创业板、科创板）
    stock_targets = [t for t in targets if t['market'] in ['主板', '创业板', '科创板']]
    new_codes = [t for t in stock_targets if t['code'] not in existing_codes]

    # 3. 全局水位线拦截
    if global_latest_date >= last_trade_day and not new_codes:
        logger.info(f"Margin data is already up to date ({global_latest_date}) and no new stocks. Skip.")
        return {'count': 0, 'failed_codes': [], 'status': 'skipped', 'record_count': 0, 'changed_files': [], 'message': f'Already up to date ({global_latest_date})'}

    # 4. 增量获取
    if global_latest_date >= last_trade_day:
        logger.info(f"Global date is up to date, but found {len(new_codes)} new stocks. Syncing new stocks only.")
        sync_targets = new_codes
    else:
        logger.info(f"Syncing margin data from {global_latest_date} to {last_trade_day}...")
        sync_targets = stock_targets

    all_data = []
    success_count = 0
    
    with ThreadPoolExecutor(max_workers=get_thread_config_safe()['margin']) as executor:
        futures = {executor.submit(get_stock_margin, t['code']): t['code'] for t in sync_targets}
        for i, future in enumerate(as_completed(futures), 1):
            res = future.result()
            if res is not None and not res.empty:
                code = futures[future]
                if code in existing_codes:
                    res = res[res['trade_date'] > pd.to_datetime(global_latest_date)]
                if not res.empty:
                    all_data.append(res)
                    success_count += 1
            if i % 500 == 0:
                logger.info(f"Margin progress: {i}/{len(sync_targets)}, success: {success_count}")
    
    total_records = 0
    changed_files = []  # 记录变更的文件名
    
    # 5. 按月分表保存
    if all_data:
        new_df = pd.concat(all_data, ignore_index=True)
        total_records = len(new_df)
        
        if not new_df.empty:
            min_date = new_df['trade_date'].min()
            max_date = new_df['trade_date'].max()
            
            current = min_date.to_period('M')
            end_period = max_date.to_period('M')
            
            while current <= end_period:
                yr, mo = current.year, current.month
                month_data = new_df[(new_df['trade_date'].dt.year == yr) & (new_df['trade_date'].dt.month == mo)]
                
                if not month_data.empty:
                    filename = f"{yr}-{mo:02d}.parquet"
                    local_path = margin_dir / filename
                    
                    old_month_df = None
                    if local_path.exists():
                        try:
                            old_month_df = pd.read_parquet(local_path)
                        except Exception:
                            pass
                    
                    if old_month_df is not None:
                        final_month_df = pd.concat([old_month_df, month_data]).drop_duplicates(subset=['code', 'trade_date'])
                    else:
                        final_month_df = month_data
                    
                    final_month_df.to_parquet(local_path)
                    changed_files.append(filename)  # 记录变更的文件
                    logger.info(f"Saved margin data for {filename}")
                
                current += 1
        
        logger.info(f"Margin updated: {len(new_df)} new records")
    
    return {
        'count': success_count,
        'failed_codes': [],
        'status': 'success',
        'record_count': total_records,
        'changed_files': changed_files  # 返回变更文件列表
    }


# ==================== 新增：财务指标数据同步 ====================

def get_stock_financial_indicator(code: str) -> Optional[pd.DataFrame]:
    """获取单只股票财务指标数据"""
    max_retries = 5
    base_delay = 1.0
    for attempt in range(max_retries):
        try:
            df = ak.stock_financial_analysis_indicator(symbol=code)
            if df is not None and not df.empty:
                # 标准化列名
                rename_map = {
                    '日期': 'trade_date',
                    '净资产收益率': 'roe',
                    '总资产净利率': 'roa',
                    '销售毛利率': 'gross_margin',
                    '销售净利率': 'net_margin',
                    '资产负债率': 'debt_ratio',
                    '流动比率': 'current_ratio',
                    '速动比率': 'quick_ratio',
                    '存货周转率': 'inventory_turnover',
                    '应收账款周转率': 'receivable_turnover',
                    '总资产周转率': 'total_asset_turnover',
                }
                df = df.rename(columns=rename_map)
                if 'trade_date' in df.columns:
                    df['trade_date'] = pd.to_datetime(df['trade_date'])
                df['code'] = code
                
                cols = ['code', 'trade_date', 'roe', 'roa', 'gross_margin', 'net_margin',
                        'debt_ratio', 'current_ratio', 'quick_ratio', 
                        'inventory_turnover', 'receivable_turnover', 'total_asset_turnover']
                available_cols = [c for c in cols if c in df.columns]
                return df[available_cols]
        except Exception:
            if attempt > 0:
                delay = base_delay * (2 ** attempt)
                time.sleep(delay)
    return None


def sync_financial_indicator(targets: List[Dict[str, str]]) -> Dict[str, Any]:
    """同步财务指标数据（极致增量版），返回详细结果"""
    logger.info("Syncing financial indicator data...")
    fi_path = Path("/tmp/data/financial_indicator.parquet")
    fi_path.parent.mkdir(parents=True, exist_ok=True)
    
    old_df = None
    old_count = 0
    global_latest_date = "2000-01-01"
    existing_codes = set()

    # 1. 优先读取本地缓存
    if fi_path.exists():
        try:
            old_df = pd.read_parquet(fi_path)
            old_count = len(old_df)
            global_latest_date = old_df['trade_date'].max().strftime('%Y-%m-%d')
            existing_codes = set(old_df['code'].unique())
            logger.info(f"Local financial cache found, latest date: {global_latest_date}, records: {old_count}")
        except Exception as e:
            logger.warning(f"Failed to read local financial cache: {e}")

    # 2. 本地无缓存，尝试从云端拉取
    if old_df is None:
        repo_id = os.getenv("DATASET_REPO_ID")
        if repo_id:
            try:
                old_file = hf_hub_download(repo_id=repo_id, filename="data/financial_indicator.parquet", repo_type="dataset")
                old_df = pd.read_parquet(old_file)
                old_count = len(old_df)
                global_latest_date = old_df['trade_date'].max().strftime('%Y-%m-%d')
                existing_codes = set(old_df['code'].unique())
                old_df.to_parquet(fi_path)
                logger.info(f"Downloaded financial from cloud, latest date: {global_latest_date}, records: {old_count}")
            except Exception:
                logger.info("No existing financial data found in cloud.")

    # 3. 财务指标特殊拦截 + 新股检测
    stock_targets = [t for t in targets if t['market'] not in ['ETF', 'LOF', 'REITs', '可转债']]
    new_codes = [t for t in stock_targets if t['code'] not in existing_codes]
    
    today = get_beijing_time()
    is_recent = False
    if global_latest_date != "2000-01-01":
        days_diff = (today - pd.to_datetime(global_latest_date)).days
        if days_diff < 90:
            is_recent = True

    if is_recent and not new_codes:
        logger.info(f"Financial data is recent ({global_latest_date}) and no new stocks. Skip.")
        return {'count': 0, 'status': 'skipped', 'record_count': old_count, 'new_records': 0, 'message': f'Already up to date ({global_latest_date})'}

    # 4. 增量获取
    if is_recent:
        logger.info(f"Financial data is recent, but found {len(new_codes)} new stocks. Syncing new stocks only.")
        sync_targets = new_codes
    else:
        logger.info(f"Syncing financial indicators (last update: {global_latest_date})...")
        sync_targets = stock_targets

    all_data = []
    success_count = 0
    
    with ThreadPoolExecutor(max_workers=get_thread_config_safe()['financial']) as executor:
        futures = {executor.submit(get_stock_financial_indicator, t['code']): t['code'] for t in sync_targets}
        for i, future in enumerate(as_completed(futures), 1):
            res = future.result()
            if res is not None and not res.empty:
                code = futures[future]
                if code in existing_codes:
                    res = res[res['trade_date'] > pd.to_datetime(global_latest_date)]
                
                if not res.empty:
                    all_data.append(res)
                    success_count += 1
            if i % 500 == 0:
                logger.info(f"Financial indicator progress: {i}/{len(sync_targets)}, success: {success_count}")
    
    # 5. 合并保存
    new_records = 0
    final_count = old_count
    if all_data:
        new_df = pd.concat(all_data, ignore_index=True)
        new_records = len(new_df)
        final_df = pd.concat([old_df, new_df]) if old_df is not None else new_df
        final_df = final_df.drop_duplicates(subset=['code', 'trade_date'])
        final_count = len(final_df)
        final_df.to_parquet(fi_path)
        logger.info(f"Financial updated: {final_count} total records ({new_records} new)")
    
    return {
        'count': success_count,
        'status': 'success' if success_count > 0 or old_count > 0 else 'fail',
        'record_count': final_count,
        'new_records': new_records,
        'previous_count': old_count
    }


# ==================== 新增：股东户数数据同步 ====================

def sync_holder_num() -> Dict[str, Any]:
    """同步股东户数数据（批量获取），返回详细结果"""
    logger.info("Syncing holder number data...")
    
    hn_path = Path("/tmp/data/holder_num.parquet")
    old_count = 0
    
    # 读取现有数据
    if hn_path.exists():
        try:
            old_df = pd.read_parquet(hn_path)
            old_count = len(old_df)
        except Exception:
            pass
    
    try:
        # 获取最近报告期的股东户数数据
        df = ak.stock_zh_a_gdhs(symbol="全部")
        if df is not None and not df.empty:
            # 标准化列名
            rename_map = {
                '代码': 'code',
                '股东户数': 'holder_num',
                '户均持股数量': 'avg_share',
                '户均持股金额': 'avg_value',
                '总股本': 'total_share',
                '总市值': 'total_value',
                '日期': 'trade_date',
            }
            df = df.rename(columns=rename_map)
            if 'trade_date' in df.columns:
                df['trade_date'] = pd.to_datetime(df['trade_date'])
            
            new_count = len(df)
            
            # 保存到 Parquet
            hn_path.parent.mkdir(parents=True, exist_ok=True)
            df.to_parquet(hn_path)
            
            # 判断是否有变化
            is_changed = new_count != old_count
            
            logger.info(f"Holder number data saved: {new_count} records (previous: {old_count}, changed: {is_changed})")
            return {
                'count': new_count,
                'status': 'success',
                'record_count': new_count,
                'previous_count': old_count,
                'is_changed': is_changed
            }
    except Exception as e:
        logger.warning(f"Failed to sync holder number data: {e}")
    
    return {
        'count': 0,
        'status': 'fail',
        'record_count': old_count,
        'previous_count': old_count,
        'is_changed': False
    }


# ==================== 新增：分红数据同步 ====================

def get_stock_dividend(code: str) -> Optional[pd.DataFrame]:
    """获取单只股票分红数据"""
    max_retries = 5
    base_delay = 1.0
    for attempt in range(max_retries):
        try:
            df = ak.stock_history_dividend(symbol=code)
            if df is not None and not df.empty:
                # 标准化列名
                rename_map = {
                    '公告日期': 'trade_date',
                    '分红方案': 'dividend_type',
                    '分红金额': 'dividend_amount',
                    '股权登记日': 'record_date',
                    '除权除息日': 'ex_date',
                    '派息日': 'pay_date',
                }
                df = df.rename(columns=rename_map)
                df['trade_date'] = pd.to_datetime(df['trade_date'])
                df['code'] = code
                
                cols = ['code', 'trade_date', 'dividend_type', 'dividend_amount', 'record_date', 'ex_date', 'pay_date']
                available_cols = [c for c in cols if c in df.columns]
                return df[available_cols]
        except Exception:
            if attempt > 0:
                delay = base_delay * (2 ** attempt)
                time.sleep(delay)
    return None


def sync_dividend(targets: List[Dict[str, str]]) -> Dict[str, Any]:
    """同步分红数据（极致增量版），返回详细结果"""
    logger.info("Syncing dividend data...")
    div_path = Path("/tmp/data/dividend.parquet")
    div_path.parent.mkdir(parents=True, exist_ok=True)
    
    old_df = None
    old_count = 0
    global_latest_date = "2000-01-01"
    existing_codes = set()

    if div_path.exists():
        try:
            old_df = pd.read_parquet(div_path)
            old_count = len(old_df)
            global_latest_date = old_df['trade_date'].max().strftime('%Y-%m-%d')
            existing_codes = set(old_df['code'].unique())
        except Exception: pass

    if old_df is None:
        repo_id = os.getenv("DATASET_REPO_ID")
        if repo_id:
            try:
                old_file = hf_hub_download(repo_id=repo_id, filename="data/dividend.parquet", repo_type="dataset")
                old_df = pd.read_parquet(old_file)
                old_count = len(old_df)
                global_latest_date = old_df['trade_date'].max().strftime('%Y-%m-%d')
                existing_codes = set(old_df['code'].unique())
                old_df.to_parquet(div_path)
            except Exception: pass

    # 90天检查一次 + 新股检测
    stock_targets = [t for t in targets if t['market'] not in ['ETF', 'LOF', 'REITs', '可转债']]
    new_codes = [t for t in stock_targets if t['code'] not in existing_codes]

    today = get_beijing_time()
    is_recent = False
    if global_latest_date != "2000-01-01":
        if (today - pd.to_datetime(global_latest_date)).days < 90:
            is_recent = True

    if is_recent and not new_codes:
        logger.info(f"Dividend data is recent and no new stocks. Skip.")
        return {'count': 0, 'status': 'skipped', 'record_count': old_count, 'new_records': 0, 'message': f'Already up to date ({global_latest_date})'}

    # 4. 增量获取
    if is_recent:
        logger.info(f"Dividend data is recent, but found {len(new_codes)} new stocks. Syncing new stocks only.")
        sync_targets = new_codes
    else:
        logger.info(f"Syncing dividend data (last update: {global_latest_date})...")
        sync_targets = stock_targets
    
    all_data = []
    success_count = 0
    
    with ThreadPoolExecutor(max_workers=get_thread_config_safe()['dividend']) as executor:
        futures = {executor.submit(get_stock_dividend, t['code']): t['code'] for t in sync_targets}
        for i, future in enumerate(as_completed(futures), 1):
            res = future.result()
            if res is not None and not res.empty:
                code = futures[future]
                if code in existing_codes:
                    res = res[res['trade_date'] > pd.to_datetime(global_latest_date)]

                if not res.empty:
                    all_data.append(res)
                    success_count += 1
            if i % 500 == 0:
                logger.info(f"Dividend progress: {i}/{len(sync_targets)}, success: {success_count}")
    
    new_records = 0
    final_count = old_count
    if all_data:
        new_df = pd.concat(all_data, ignore_index=True)
        new_records = len(new_df)
        final_df = pd.concat([old_df, new_df]) if old_df is not None else new_df
        final_df = final_df.drop_duplicates(subset=['code', 'trade_date', 'dividend_type'])
        final_count = len(final_df)
        final_df.to_parquet(div_path)
        logger.info(f"Dividend updated: {final_count} total records ({new_records} new)")
    
    return {
        'count': success_count,
        'status': 'success' if success_count > 0 or old_count > 0 else 'fail',
        'record_count': final_count,
        'new_records': new_records,
        'previous_count': old_count
    }


# ==================== 新增：十大股东数据同步 ====================

def sync_top_holders() -> Dict[str, Any]:
    """同步十大股东数据（批量获取），返回详细结果"""
    logger.info("Syncing top holders data...")
    
    path = Path("/tmp/data/top_holders.parquet")
    old_count = 0
    
    # 读取现有数据
    if path.exists():
        try:
            old_df = pd.read_parquet(path)
            old_count = len(old_df)
        except Exception:
            pass
    
    try:
        today = get_beijing_time()
        df = ak.stock_gdfx_holding_analyse_em(date=today.strftime('%Y%m%d'))
        if df is not None and not df.empty:
            rename_map = {
                '股票代码': 'code',
                '公告日期': 'trade_date',
                '股东名称': 'holder_name',
                '持股数量': 'hold_num',
                '持股比例': 'hold_ratio',
                '持股变动': 'hold_change',
            }
            df = df.rename(columns=rename_map)
            df['trade_date'] = pd.to_datetime(df['trade_date'])
            
            new_count = len(df)
            
            path.parent.mkdir(parents=True, exist_ok=True)
            df.to_parquet(path)
            
            # 判断是否有变化
            is_changed = new_count != old_count
            
            logger.info(f"Top holders data saved: {new_count} records (previous: {old_count}, changed: {is_changed})")
            return {
                'count': new_count,
                'status': 'success',
                'record_count': new_count,
                'previous_count': old_count,
                'is_changed': is_changed
            }
    except Exception as e:
        logger.warning(f"Failed to sync top holders: {e}")
    
    return {
        'count': 0,
        'status': 'fail',
        'record_count': old_count,
        'previous_count': old_count,
        'is_changed': False
    }


# ==================== 新增：限售解禁数据同步 ====================

def sync_restricted_unlock() -> Dict[str, Any]:
    """同步限售解禁数据（批量获取），返回详细结果"""
    logger.info("Syncing restricted unlock data...")
    path = Path("/tmp/data/restricted_unlock.parquet")
    path.parent.mkdir(parents=True, exist_ok=True)
    
    old_count = 0
    
    # 读取现有数据
    if path.exists():
        try:
            old_df = pd.read_parquet(path)
            old_count = len(old_df)
        except Exception:
            pass
    
    try:
        # 获取全市场限售解禁数据
        df = ak.stock_restricted_shares(stock="all")
        if df is not None and not df.empty:
            rename_map = {
                '代码': 'code',
                '名称': 'name',
                '解禁日期': 'unlock_date',
                '解禁数量': 'unlock_num',
                '解禁股本占总股本比例': 'unlock_ratio',
            }
            df = df.rename(columns=rename_map)
            df['unlock_date'] = pd.to_datetime(df['unlock_date'])
            df['trade_date'] = get_beijing_time() # 记录同步日期
            
            new_count = len(df)
            
            df.to_parquet(path)
            
            # 判断是否有变化
            is_changed = new_count != old_count
            
            logger.info(f"Restricted unlock data saved: {new_count} records (previous: {old_count}, changed: {is_changed})")
            return {
                'count': new_count,
                'status': 'success',
                'record_count': new_count,
                'previous_count': old_count,
                'is_changed': is_changed
            }
    except Exception as e:
        logger.warning(f"Failed to sync restricted unlock: {e}")
    
    return {
        'count': 0,
        'status': 'fail',
        'record_count': old_count,
        'previous_count': old_count,
        'is_changed': False
    }


def main() -> int:
    """

    主函数 - 执行完整的数据同步流程（每类指标完成后即时上传，并记录状态）

    

    Returns:

        int: 退出码，0 表示成功，1 表示失败

    """
    logger.info("=" * 60)
    logger.info("Stock Data Sync Started")
    logger.info("=" * 60)
    
    try:
        # 初始化线程配置
        init_thread_config()
        
        db = get_db()
        db.init_db()
        
        # 获取状态管理器
        status = get_sync_status()
        
        # 获取最后交易日
        last_day = get_last_trading_day()
        logger.info(f"Last trading day: {last_day}")
        
        # 1. 列表同步
        logger.info("-" * 40)
        logger.info("Syncing stock list...")
        target_list = get_stock_list()
        list_parquet = Path("/tmp/data/stock_list.parquet")
        list_parquet.parent.mkdir(parents=True, exist_ok=True)
        target_list.to_parquet(list_parquet)
        db.upload_indicator("Stock List", list_parquet, "data")
        status.update('stock_list', 
                     last_trade_date=last_day,
                     record_count=len(target_list),
                     status='success')

        # 2. 行情同步
        logger.info("-" * 40)
        logger.info("Syncing daily data...")
        daily_result = sync_stock_daily(target_list.to_dict('records'), last_day)
        # 智能上传日K行情数据（根据变更文件数量选择策略）
        parquet_dir = Path("/tmp/data/parquet")
        if parquet_dir.exists() and any(parquet_dir.glob("*.parquet")):
            db.upload_indicator_smart("Daily Data", parquet_dir, "data/parquet",
                                     daily_result.get('changed_files', []))
        status.update('daily',
                     last_trade_date=last_day,
                     record_count=daily_result.get('record_count', 0),
                     status=daily_result.get('status', 'unknown'),
                     failed_codes=daily_result.get('failed_codes', []),
                     success_rate=daily_result.get('success_rate', 0),
                     message=daily_result.get('message', ''))
        
        # 3. 指数同步
        logger.info("-" * 40)
        logger.info("Syncing index data...")
        idx_df = get_index_daily('000300')
        idx_count = 0
        if idx_df is not None:
            idx_path = Path("/tmp/data/parquet/index_000300.parquet")
            idx_path.parent.mkdir(parents=True, exist_ok=True)
            idx_df.to_parquet(idx_path)
            db.upload_indicator("Index Data", idx_path, "data/parquet")
            idx_count = len(idx_df)
        status.update('index',
                     last_trade_date=last_day,
                     record_count=idx_count,
                     status='success' if idx_count > 0 else 'fail')

        # 4. 资金流向同步
        logger.info("-" * 40)
        logger.info("Syncing fund flow...")
        fund_flow_result = sync_fund_flow(target_list.to_dict('records'), last_day)
        # 智能上传资金流向数据（根据变更文件数量选择策略）
        fund_flow_dir = Path("/tmp/data/fund_flow")
        if fund_flow_dir.exists() and any(fund_flow_dir.glob("*.parquet")):
            db.upload_indicator_smart("Fund Flow", fund_flow_dir, "data/fund_flow",
                                     fund_flow_result.get('changed_files', []))
        status.update('fund_flow',
                     last_trade_date=last_day,
                     record_count=fund_flow_result.get('record_count', 0),
                     status=fund_flow_result.get('status', 'unknown'),
                     failed_codes=fund_flow_result.get('failed_codes', []),
                     success_rate=fund_flow_result.get('success_rate', 0),
                     message=fund_flow_result.get('message', ''))
        
        # 5. 估值指标同步
        logger.info("-" * 40)
        logger.info("Syncing valuation...")
        valuation_result = sync_valuation(target_list.to_dict('records'), last_day)
        # 智能上传估值指标数据（根据变更文件数量选择策略）
        valuation_dir = Path("/tmp/data/valuation")
        if valuation_dir.exists() and any(valuation_dir.glob("*.parquet")):
            db.upload_indicator_smart("Valuation", valuation_dir, "data/valuation",
                                     valuation_result.get('changed_files', []))
        status.update('valuation',
                     last_trade_date=last_day,
                     record_count=valuation_result.get('record_count', 0),
                     status=valuation_result.get('status', 'success'))
        
        # 6. 融资融券同步
        logger.info("-" * 40)
        logger.info("Syncing margin...")
        margin_result = sync_margin(target_list.to_dict('records'), last_day)
        # 智能上传融资融券数据（根据变更文件数量选择策略）
        margin_dir = Path("/tmp/data/margin")
        if margin_dir.exists() and any(margin_dir.glob("*.parquet")):
            db.upload_indicator_smart("Margin", margin_dir, "data/margin",
                                     margin_result.get('changed_files', []))
        status.update('margin',
                     last_trade_date=last_day,
                     record_count=margin_result.get('record_count', 0),
                     status=margin_result.get('status', 'success'))
        
        # 7. 财务指标同步
        logger.info("-" * 40)
        logger.info("Syncing financial indicator...")
        financial_result = sync_financial_indicator(target_list.to_dict('records'))
        fi_path = Path("/tmp/data/financial_indicator.parquet")
        if fi_path.exists() and financial_result.get('new_records', 0) > 0:
            upload_success = db.upload_indicator("Financial Indicator", fi_path, "data")
            financial_status = 'success' if upload_success else 'upload_fail'
        else:
            financial_status = financial_result.get('status', 'skipped')
        status.update('financial',
                     last_trade_date=last_day,
                     record_count=financial_result.get('record_count', 0),
                     status=financial_status,
                     new_records=financial_result.get('new_records', 0))
        
        # 8. 股东户数同步
        logger.info("-" * 40)
        logger.info("Syncing holder num...")
        holder_result = sync_holder_num()
        holder_path = Path("/tmp/data/holder_num.parquet")
        if holder_result.get('is_changed', False) and holder_path.exists():
            upload_success = db.upload_indicator("Holder Num", holder_path, "data")
            holder_status = 'success' if upload_success else 'upload_fail'
        else:
            holder_status = 'skipped' if not holder_result.get('is_changed', False) else holder_result.get('status', 'fail')
        status.update('holder_num',
                     last_trade_date=last_day,
                     record_count=holder_result.get('record_count', 0),
                     status=holder_status,
                     is_changed=holder_result.get('is_changed', False))
        
        # 9. 分红数据同步
        logger.info("-" * 40)
        logger.info("Syncing dividend...")
        dividend_result = sync_dividend(target_list.to_dict('records'))
        div_path = Path("/tmp/data/dividend.parquet")
        if div_path.exists() and dividend_result.get('new_records', 0) > 0:
            upload_success = db.upload_indicator("Dividend", div_path, "data")
            dividend_status = 'success' if upload_success else 'upload_fail'
        else:
            dividend_status = dividend_result.get('status', 'skipped')
        status.update('dividend',
                     last_trade_date=last_day,
                     record_count=dividend_result.get('record_count', 0),
                     status=dividend_status,
                     new_records=dividend_result.get('new_records', 0))
        
        # 10. 十大股东同步
        logger.info("-" * 40)
        logger.info("Syncing top holders...")
        top_holders_result = sync_top_holders()
        top_holders_path = Path("/tmp/data/top_holders.parquet")
        if top_holders_result.get('is_changed', False) and top_holders_path.exists():
            upload_success = db.upload_indicator("Top Holders", top_holders_path, "data")
            top_holders_status = 'success' if upload_success else 'upload_fail'
        else:
            top_holders_status = 'skipped' if not top_holders_result.get('is_changed', False) else top_holders_result.get('status', 'fail')
        status.update('top_holders',
                     last_trade_date=last_day,
                     record_count=top_holders_result.get('record_count', 0),
                     status=top_holders_status,
                     is_changed=top_holders_result.get('is_changed', False))

        # 11. 限售解禁同步
        logger.info("-" * 40)
        logger.info("Syncing restricted unlock...")
        restricted_result = sync_restricted_unlock()
        restricted_path = Path("/tmp/data/restricted_unlock.parquet")
        if restricted_result.get('is_changed', False) and restricted_path.exists():
            upload_success = db.upload_indicator("Restricted Unlock", restricted_path, "data")
            restricted_status = 'success' if upload_success else 'upload_fail'
        else:
            restricted_status = 'skipped' if not restricted_result.get('is_changed', False) else restricted_result.get('status', 'fail')
        status.update('restricted_unlock',
                     last_trade_date=last_day,
                     record_count=restricted_result.get('record_count', 0),
                     status=restricted_status,
                     is_changed=restricted_result.get('is_changed', False))
        
        # 12. 上传状态文件
        logger.info("-" * 40)
        logger.info("Uploading sync status...")
        status_path = Path("/tmp/data/sync_status.json")
        status_upload_success = db.upload_indicator("Sync Status", status_path, "data")
        if not status_upload_success:
            logger.warning("Failed to upload sync status file")
        
        logger.info("=" * 60)
        logger.info("Sync Completed Successfully!")
        summary = (f"Daily={daily_result.get('count', 0)}, FundFlow={fund_flow_result.get('count', 0)}, "
                   f"Valuation={valuation_result.get('count', 0)}, Margin={margin_result.get('count', 0)}, Financial={financial_result.get('count', 0)}, "
                   f"Holder={holder_result.get('count', 0)}, Dividend={dividend_result.get('count', 0)}, "
                   f"TopHolders={top_holders_result.get('count', 0)}, Restricted={restricted_result.get('count', 0)}")
        logger.info(f"Summary: {summary}")
        logger.info("=" * 60)
        return 0
        
    except Exception as e:
        logger.error(f"Sync failed with error: {e}")
        return 1

if __name__ == "__main__":
    sys.exit(main())