""" 超峰海外代理 (Chaofeng Overseas Proxy) HuggingFace Space — FastAPI proxy for accessing overseas resources from China. Capabilities: 1. US stock quotes/klines (Yahoo Finance) 2. Global financial news (Finnhub) 3. File/model download proxy (HuggingFace, GitHub, etc.) 4. General HTTP fetch proxy for blocked sites Used by: 金融家 (JinRongJia), Narnia, and other projects needing overseas access. """ import hashlib import io import logging import os import tempfile import time from typing import Optional from urllib.parse import urlparse import requests from fastapi import FastAPI, Query, Request from fastapi.middleware.cors import CORSMiddleware from fastapi.responses import JSONResponse, StreamingResponse, Response import yfinance as yf logging.basicConfig(level=logging.INFO) logger = logging.getLogger("chaofeng") app = FastAPI(title="超峰海外代理", version="0.2.2") app.add_middleware( CORSMiddleware, allow_origins=["*"], allow_methods=["*"], allow_headers=["*"], ) TICKER_CACHE: dict[str, yf.Ticker] = {} # Safety: only proxy to known-safe domains by default ALLOWED_DOMAINS = { "huggingface.co", "hf-mirror.com", "cdn-lfs.hf.co", "cdn-lfs-us-1.hf.co", "github.com", "api.github.com", "raw.githubusercontent.com", "github-releases.githubusercontent.com", "objects.githubusercontent.com", "pypi.org", "files.pythonhosted.org", "storage.googleapis.com", "query1.finance.yahoo.com", "query2.finance.yahoo.com", "fc.yahoo.com", "finance.yahoo.com", "finnhub.io", "arxiv.org", } # Max file size for downloads (500MB) MAX_DOWNLOAD_SIZE = 500 * 1024 * 1024 def _get_ticker(symbol: str) -> yf.Ticker: if symbol not in TICKER_CACHE: TICKER_CACHE[symbol] = yf.Ticker(symbol) return TICKER_CACHE[symbol] def _is_allowed(url: str) -> bool: """Check if URL domain is in the allowlist.""" try: host = urlparse(url).hostname if host is None: return False return any( host == allowed or host.endswith("." + allowed) for allowed in ALLOWED_DOMAINS ) except Exception: return False # ═══════════════════════════════════════════════════════════════ # Health # ═══════════════════════════════════════════════════════════════ @app.get("/") def health(): return { "status": "ok", "service": "chaofeng-overseas-proxy", "version": "0.2.2", "endpoints": [ "GET /api/us/quotes?symbols=AAPL,TSLA", "GET /api/us/klines/{symbol}?period=day&count=250", "GET /api/us/news/{symbol}?limit=20", "GET /api/us/news/bulk?symbols=AAPL,QCOM&limit=10", "GET /api/us/earnings/{symbol}", "GET /api/news/global?limit=50", "GET /api/proxy/fetch?url=...", "GET /api/proxy/download?url=...&filename=...", ], } # ═══════════════════════════════════════════════════════════════ # US Stock Quotes # ═══════════════════════════════════════════════════════════════ @app.get("/api/us/quotes") def us_quotes(symbols: str = Query(..., description="Comma-separated ticker symbols")): ticker_list = [s.strip() for s in symbols.split(",") if s.strip()] if not ticker_list: return JSONResponse({"error": "No symbols provided"}, status_code=400) results = [] for symbol in ticker_list: try: t = _get_ticker(symbol) info = t.fast_info price = float(info.get("lastPrice", 0) or 0) prev_close = float(info.get("previousClose", 0) or 0) change = price - prev_close if price and prev_close else 0 change_pct = (change / prev_close * 100) if prev_close else 0 results.append({ "symbol": symbol, "price": price, "open": float(info.get("open", 0) or 0), "high": float(info.get("dayHigh", 0) or 0), "low": float(info.get("dayLow", 0) or 0), "pre_close": prev_close, "change": round(change, 4), "change_percent": round(change_pct, 2), "volume": int(info.get("lastVolume", 0) or 0), }) except Exception as e: logger.warning(f"Quote fetch failed for {symbol}: {e}") results.append({"symbol": symbol, "error": str(e)}) return {"count": len(results), "quotes": results} # ═══════════════════════════════════════════════════════════════ # US Stock Klines # ═══════════════════════════════════════════════════════════════ @app.get("/api/us/klines/{symbol}") def us_klines( symbol: str, period: str = Query("day"), count: int = Query(250), ): interval_map = {"day": "1d", "week": "1wk", "month": "1mo"} interval = interval_map.get(period, "1d") if count <= 5: range_str = "5d" elif count <= 30: range_str = "1mo" elif count <= 90: range_str = "3mo" elif count <= 180: range_str = "6mo" elif count <= 365: range_str = "1y" else: range_str = "2y" try: t = _get_ticker(symbol) df = t.history(period=range_str, interval=interval) if df.empty: return {"symbol": symbol, "period": period, "count": 0, "klines": []} results = [] for idx, row in df.iterrows(): results.append({ "timestamp": int(idx.timestamp() * 1000), "open": float(row["Open"]), "high": float(row["High"]), "low": float(row["Low"]), "close": float(row["Close"]), "volume": int(row["Volume"]), }) if count and len(results) > count: results = results[-count:] return {"symbol": symbol, "period": period, "count": len(results), "klines": results} except Exception as e: logger.warning(f"Kline fetch failed for {symbol}: {e}") return JSONResponse({"symbol": symbol, "error": str(e)}, status_code=500) # ═══════════════════════════════════════════════════════════════ # Global Financial News # ═══════════════════════════════════════════════════════════════ @app.get("/api/news/global") def global_news( limit: int = Query(50, ge=1, le=100), category: str = Query("general"), ): articles = [] # Try Finnhub first try: url = "https://finnhub.io/api/v1/news" params = {"category": category} resp = requests.get(url, params=params, timeout=10) resp.raise_for_status() for item in resp.json()[:limit]: articles.append({ "id": f"finnhub_{item.get('id', '')}", "source": "finnhub", "title": item.get("headline", ""), "content": item.get("summary", ""), "url": item.get("url", ""), "publish_time": item.get("datetime", 0) * 1000, "related_symbols": item.get("related", ""), "category": item.get("category", category), }) except Exception as e: logger.warning(f"Finnhub news failed: {e}") # Fallback: yfinance news for major US tickers if not articles: articles = _fetch_yf_news(["AAPL", "MSFT", "GOOGL", "AMZN", "NVDA", "META", "TSLA"], limit) return {"count": len(articles), "articles": articles} @app.get("/api/us/news/bulk") def us_stock_news_bulk( symbols: str = Query(..., description="Comma-separated US stock symbols"), limit: int = Query(10, ge=1, le=30), ): """Get recent news for multiple US stocks in one request.""" syms = [s.strip() for s in symbols.split(",") if s.strip()] articles = _fetch_yf_news(syms, limit) return {"symbols": symbols, "count": len(articles), "articles": articles} @app.get("/api/us/news/{symbol}") def us_stock_news( symbol: str, limit: int = Query(20, ge=1, le=50), ): """Get recent news for a US stock via yfinance.""" articles = _fetch_yf_news([symbol], limit) return {"symbol": symbol, "count": len(articles), "articles": articles} def _fetch_yf_news(symbols: list[str], limit: int) -> list[dict]: """Fetch news for given symbols via yfinance.""" from datetime import datetime, timezone articles = [] seen_ids = set() for sym in symbols: try: ticker = _get_ticker(sym) raw_news = ticker.news if not raw_news: continue for item in raw_news[:limit]: # yfinance format: {'id': '...', 'content': {title, summary, pubDate, canonicalUrl, relatedTickers, ...}} inner = item.get("content", item) # fallback: use item directly title = inner.get("title", "") desc = inner.get("summary", "") or inner.get("description", "") url = "" curl = inner.get("canonicalUrl") if isinstance(curl, dict): url = curl.get("url", "") art_id = item.get("id", "") or hashlib.md5((url + title).encode()).hexdigest() if art_id in seen_ids: continue seen_ids.add(art_id) # Parse pubDate (ISO 8601 string like "2026-05-07T21:04:15Z") pub_date = inner.get("pubDate", "") pub_ms = 0 if pub_date: try: pub_ms = int(datetime.fromisoformat(pub_date.replace("Z", "+00:00")).timestamp() * 1000) except Exception: pass related = inner.get("relatedTickers", []) if isinstance(related, list): related = ",".join(related) articles.append({ "id": f"yf_{art_id[:16]}", "source": "yahoo_finance", "title": title, "content": desc, "url": url, "publish_time": pub_ms, "related_symbols": str(related) if related else "", "category": inner.get("contentType", "news"), }) except Exception as e: logger.warning(f"yfinance news for {sym} failed: {e}") return articles[:limit * len(symbols)] # ═══════════════════════════════════════════════════════════════ # Earnings / Financial Reports # ═══════════════════════════════════════════════════════════════ @app.get("/api/us/earnings/{symbol}") def us_earnings(symbol: str): """Get quarterly earnings data for a US stock via yfinance.""" try: t = _get_ticker(symbol) quarterly = [] debug = {} # Method 1: earnings_history (quoteSummary API) with retry for attempt in range(3): try: hist = t.earnings_history debug["hist_type"] = str(type(hist)) if hist is not None and not hist.empty: debug["hist_len"] = len(hist) debug["hist_cols"] = list(hist.columns) for idx, row in hist.iterrows(): quarterly.append({ "quarter": str(idx), "eps_estimate": float(row.get("epsEstimate", row.get("EPS Estimate", 0)) or 0), "reported_eps": float(row.get("epsActual", row.get("Reported EPS", 0)) or 0), "surprise_pct": round(float(row.get("surprisePercent", row.get("Surprise(%)", 0)) or 0) * 100, 2), }) break # success else: debug["hist_empty_attempt_%d" % attempt] = True except Exception as e: debug["hist_error_attempt_%d" % attempt] = str(e) if "rate limit" in str(e).lower() and attempt < 2: time.sleep(2 * (attempt + 1)) continue # Method 2: quarterly_earnings (income statement data) if not quarterly: try: qe = t.quarterly_earnings debug["qe_type"] = str(type(qe)) if qe is not None and hasattr(qe, "empty") and not qe.empty: debug["qe_len"] = len(qe) debug["qe_cols"] = list(qe.columns) if hasattr(qe, "columns") else "no_cols" for idx, row in qe.iterrows(): quarterly.append({ "quarter": str(idx), "eps_estimate": 0.0, "reported_eps": float(row.get("Diluted EPS", 0) or 0), "surprise_pct": 0.0, }) except Exception as e: debug["qe_error"] = str(e) # Method 3: earnings_dates (scraper — last resort) if not quarterly: try: df = t.get_earnings_dates(limit=12) debug["df_type"] = str(type(df)) if df is not None and not df.empty: debug["df_len"] = len(df) debug["df_cols"] = list(df.columns) for idx, row in df.iterrows(): row_dict = row.to_dict() if hasattr(row, "to_dict") else dict(row) quarterly.append({ "quarter": str(idx), "eps_estimate": float(row_dict.get("EPS Estimate", 0) or 0), "reported_eps": float(row_dict.get("Reported EPS", 0) or 0), "surprise_pct": float(row_dict.get("Surprise(%)", 0) or 0), }) except Exception as e: debug["df_error"] = str(e) debug["df_trace"] = repr(e) # Best-effort: upcoming earnings calendar next_ed = None rev_avg = None rev_low = None rev_high = None try: cal = t.calendar if cal and isinstance(cal, dict): ed = cal.get("Earnings Date") if isinstance(ed, list) and ed: first = ed[0] if hasattr(first, "isoformat"): next_ed = first.isoformat() elif isinstance(first, str): next_ed = first rev_avg = cal.get("Revenue Average") rev_low = cal.get("Revenue Low") rev_high = cal.get("Revenue High") except Exception: pass return { "symbol": symbol, "quarterly_earnings": quarterly, "next_earnings_date": next_ed, "revenue_estimate_avg": float(rev_avg) if rev_avg is not None else None, "revenue_estimate_low": float(rev_low) if rev_low is not None else None, "revenue_estimate_high": float(rev_high) if rev_high is not None else None, "debug": debug, } except Exception as e: logger.exception(f"Earnings fetch failed for {symbol}") return JSONResponse({"symbol": symbol, "error": str(e), "trace": repr(e)}, status_code=500) # ═══════════════════════════════════════════════════════════════ # General-purpose HTTP fetch proxy # ═══════════════════════════════════════════════════════════════ @app.get("/api/proxy/fetch") def proxy_fetch( url: str = Query(..., description="Target URL to fetch"), timeout: int = Query(30, ge=5, le=120), ): """ Fetch content from an overseas URL and return it. For JSON APIs, returns parsed JSON. For HTML/text, returns raw content. Domain must be in the allowlist. """ if not _is_allowed(url): return JSONResponse({ "error": f"Domain not allowed: {urlparse(url).hostname}", "allowed_domains": sorted(ALLOWED_DOMAINS), }, status_code=403) try: resp = requests.get( url, headers={"User-Agent": "Mozilla/5.0 (compatible; chaofeng-proxy/0.2)"}, timeout=timeout, allow_redirects=True, ) resp.raise_for_status() content_type = resp.headers.get("content-type", "") # Return JSON directly if "json" in content_type: return JSONResponse(resp.json()) # Return text if any(t in content_type for t in ("text/", "application/xml", "application/javascript")): return Response( content=resp.text, media_type=content_type, headers={"X-Proxied-Url": url}, ) # Binary — return as base64 with metadata if len(resp.content) > 10 * 1024 * 1024: return JSONResponse({ "url": url, "content_type": content_type, "size": len(resp.content), "note": "Binary content too large (>10MB). Use /api/proxy/download instead.", }) import base64 return JSONResponse({ "url": url, "content_type": content_type, "size": len(resp.content), "data_base64": base64.b64encode(resp.content).decode("ascii"), }) except requests.Timeout: return JSONResponse({"error": f"Request timed out after {timeout}s"}, status_code=504) except Exception as e: return JSONResponse({"error": str(e)}, status_code=502) # ═══════════════════════════════════════════════════════════════ # File / Model download proxy (streaming) # ═══════════════════════════════════════════════════════════════ @app.get("/api/proxy/download") def proxy_download( url: str = Query(..., description="File URL to download"), filename: str = Query("", description="Optional filename override"), ): """ Stream-download a file from an overseas URL. Supports large files (models, datasets) up to 500MB. Domain must be in the allowlist. """ if not _is_allowed(url): return JSONResponse({ "error": f"Domain not allowed: {urlparse(url).hostname}", }, status_code=403) try: resp = requests.get( url, headers={"User-Agent": "Mozilla/5.0 (compatible; chaofeng-proxy/0.2)"}, stream=True, timeout=30, allow_redirects=True, ) resp.raise_for_status() content_length = resp.headers.get("content-length") if content_length and int(content_length) > MAX_DOWNLOAD_SIZE: return JSONResponse({ "error": f"File too large ({int(content_length)} bytes). Max: {MAX_DOWNLOAD_SIZE}", }, status_code=413) # Determine filename if not filename: disposition = resp.headers.get("content-disposition", "") if "filename=" in disposition: filename = disposition.split("filename=")[-1].strip('"\' ') else: filename = os.path.basename(urlparse(url).path) or "download" content_type = resp.headers.get("content-type", "application/octet-stream") def iter_chunks(): for chunk in resp.iter_content(chunk_size=8192): yield chunk return StreamingResponse( iter_chunks(), media_type=content_type, headers={ "Content-Disposition": f'attachment; filename="{filename}"', "X-Proxied-Url": url, "X-Content-Length": content_length or "unknown", }, ) except requests.Timeout: return JSONResponse({"error": "Download timed out"}, status_code=504) except Exception as e: return JSONResponse({"error": str(e)}, status_code=502) # ═══════════════════════════════════════════════════════════════ # HuggingFace model download helper # ═══════════════════════════════════════════════════════════════ @app.get("/api/proxy/hf-info/{repo_id:path}") def hf_model_info(repo_id: str): """ Get HuggingFace model/repo info (files, sizes). Useful for checking model availability before downloading. Example: /api/proxy/hf-info/google/mobilebert-uncased """ try: # Get repo info from HF API resp = requests.get( f"https://huggingface.co/api/models/{repo_id}", timeout=15, ) if resp.status_code == 404: return JSONResponse({"error": f"Repo not found: {repo_id}"}, status_code=404) resp.raise_for_status() data = resp.json() # List files files = [] siblings = data.get("siblings", []) for sib in siblings: files.append({ "filename": sib.get("rfilename"), "size": sib.get("size", 0), }) return { "repo_id": repo_id, "model_id": data.get("modelId", repo_id), "pipeline_tag": data.get("pipeline_tag"), "tags": data.get("tags", []), "files": sorted(files, key=lambda f: f["size"], reverse=True), "total_files": len(files), } except Exception as e: return JSONResponse({"error": str(e)}, status_code=502) # ═══════════════════════════════════════════════════════════════ # HuggingFace API relay (for hf upload / hf CLI through GFW) # ═══════════════════════════════════════════════════════════════ @app.api_route("/api/hf-proxy/{path:path}", methods=["GET", "POST", "PUT", "DELETE", "PATCH"]) async def hf_api_proxy(path: str, request: Request): """ Generic HuggingFace API proxy. Forwards any request to https://huggingface.co/api/{path}. Used by hf CLI when HF_ENDPOINT is set to this proxy. """ HF_API_BASE = "https://huggingface.co" # HF_ENDPOINT is set to https://chaofenghui-chaofeng.hf.space/api/hf-proxy # The hf CLI replaces https://huggingface.co with HF_ENDPOINT, so: # api/models → hf.co/api/models # spaces/X.git/info/lfs/... → hf.co/spaces/X.git/info/lfs/... # Just forward the raw path to huggingface.co target_url = f"{HF_API_BASE}/{path}" if request.url.query: target_url += f"?{request.url.query}" headers = dict(request.headers) headers.pop("host", None) headers.pop("content-length", None) body = await request.body() try: resp = requests.request( method=request.method, url=target_url, headers=headers, data=body if body else None, timeout=120, allow_redirects=True, ) # Forward all response headers from HF API response_headers = {} for key, value in resp.headers.items(): if key.lower() not in ("transfer-encoding", "content-encoding", "content-length"): response_headers[key] = value return Response( content=resp.content, status_code=resp.status_code, headers=response_headers, ) except Exception as e: logger.error(f"HF proxy error: {e}") return JSONResponse({"error": str(e)}, status_code=502) # ═══════════════════════════════════════════════════════════════ # Short Track Dashboard API Proxy # Forwards requests from WeChat Mini Program → Flask master server # WeChat requires HTTPS, so we proxy through this HF Space # ═══════════════════════════════════════════════════════════════ ST_API_BASE = "http://122.51.80.140:5000" @app.api_route("/api/st-proxy/{path:path}", methods=["GET", "POST", "PUT", "DELETE", "PATCH", "OPTIONS"]) async def st_api_proxy(path: str, request: Request): """Forward API requests to the short track dashboard master server.""" target_url = f"{ST_API_BASE}/api/{path}" if request.url.query: target_url += f"?{request.url.query}" headers = dict(request.headers) headers.pop("host", None) headers.pop("content-length", None) # Preserve the original Content-Type (important for POST JSON) if "content-type" not in {k.lower() for k in headers}: headers["content-type"] = request.headers.get("content-type", "application/json") body = await request.body() try: resp = requests.request( method=request.method, url=target_url, headers=headers, data=body if body else None, timeout=30, allow_redirects=False, ) # Build response, preserving headers the client needs response_headers = {} for key, value in resp.headers.items(): if key.lower() not in ("transfer-encoding", "content-encoding", "content-length"): response_headers[key] = value response_headers["access-control-allow-origin"] = "*" return Response( content=resp.content, status_code=resp.status_code, headers=response_headers, ) except Exception as e: logger.error(f"ST proxy error: {e}") return JSONResponse({"error": str(e)}, status_code=502)