Spaces:
Sleeping
Sleeping
| from flask import Flask, send_from_directory, request, jsonify | |
| import requests | |
| import os | |
| import json | |
| import base64 | |
| from datetime import datetime, timezone | |
| app = Flask(__name__, static_folder='.') | |
| TAVILY_KEY = os.environ.get("TAVILY_KEY", "") | |
| HF_TOKEN = os.environ.get("HF_TOKEN", "") | |
| HF_DATASET = os.environ.get("HF_DATASET", "") # e.g. "AnesNT/surfgo-cache" | |
| # βββ HuggingFace Dataset helpers ββββββββββββββββββββββββββββββββββββββββββββ | |
| def hf_headers(): | |
| return {"Authorization": f"Bearer {HF_TOKEN}", "Content-Type": "application/json"} | |
| def hf_get_cache(): | |
| """Download cache.json from HF dataset. Returns dict.""" | |
| url = f"https://huggingface.co/datasets/{HF_DATASET}/resolve/main/cache.json" | |
| r = requests.get(url, headers={"Authorization": f"Bearer {HF_TOKEN}"}, timeout=10) | |
| if r.status_code == 200: | |
| return r.json() | |
| return {} | |
| def hf_put_cache(cache: dict): | |
| """Upload updated cache.json to HF dataset.""" | |
| content = json.dumps(cache, ensure_ascii=False, indent=2) | |
| encoded = base64.b64encode(content.encode()).decode() | |
| # Get current file SHA for update | |
| sha = None | |
| r = requests.get( | |
| f"https://huggingface.co/api/datasets/{HF_DATASET}/tree/main", | |
| headers=hf_headers(), timeout=10 | |
| ) | |
| if r.status_code == 200: | |
| for f in r.json(): | |
| if f.get("path") == "cache.json": | |
| sha = f.get("oid") | |
| break | |
| payload = { | |
| "message": f"surfgo cache β {datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M UTC')}", | |
| "content": encoded, | |
| } | |
| if sha: | |
| payload["sha"] = sha | |
| requests.put( | |
| f"https://huggingface.co/api/datasets/{HF_DATASET}/upload/main/cache.json", | |
| headers=hf_headers(), json=payload, timeout=15 | |
| ) | |
| def normalize(q: str) -> str: | |
| return q.strip().lower() | |
| # βββ Routes βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def index(): | |
| return send_from_directory('.', 'index.html') | |
| def search(): | |
| if not TAVILY_KEY: | |
| return jsonify({"error": "TAVILY_KEY not set"}), 500 | |
| body = request.get_json() | |
| query = body.get("query", "").strip() | |
| key = normalize(query) | |
| use_hf = bool(HF_TOKEN and HF_DATASET) | |
| # ββ 1. Check cache first βββββββββββββββββββββββββββββββββββββββββββββββββ | |
| if use_hf: | |
| try: | |
| cache = hf_get_cache() | |
| if key in cache: | |
| entry = cache[key] | |
| # Bump hit counter | |
| cache[key]["hits"] = entry.get("hits", 0) + 1 | |
| try: hf_put_cache(cache) | |
| except: pass | |
| result = entry["result"] | |
| result["_cached"] = True | |
| result["_hits"] = cache[key]["hits"] | |
| result["_stored_at"] = entry.get("stored_at", "") | |
| return jsonify(result) | |
| except Exception as e: | |
| print(f"[cache read error] {e}") | |
| # ββ 2. Cache miss β call Tavily ββββββββββββββββββββββββββββββββββββββββββ | |
| payload = {**body, "api_key": TAVILY_KEY} | |
| resp = requests.post("https://api.tavily.com/search", json=payload, timeout=20) | |
| data = resp.json() | |
| # ββ 3. Store new result in HF ββββββββββββββββββββββββββββββββββββββββββββ | |
| if use_hf and resp.status_code == 200: | |
| try: | |
| cache = hf_get_cache() | |
| cache[key] = { | |
| "query": query, | |
| "result": data, | |
| "hits": 1, | |
| "stored_at": datetime.now(timezone.utc).isoformat(), | |
| } | |
| hf_put_cache(cache) | |
| except Exception as e: | |
| print(f"[cache write error] {e}") | |
| return jsonify(data), resp.status_code | |
| def debug(): | |
| info = { | |
| "TAVILY_KEY_set": bool(TAVILY_KEY), | |
| "HF_TOKEN_set": bool(HF_TOKEN), | |
| "HF_DATASET": HF_DATASET or "NOT SET", | |
| } | |
| if HF_TOKEN and HF_DATASET: | |
| try: | |
| cache = hf_get_cache() | |
| info["hf_read"] = "OK" | |
| info["cache_keys_count"] = len(cache) | |
| info["cache_keys"] = list(cache.keys())[:20] | |
| except Exception as e: | |
| info["hf_read"] = f"ERROR: {e}" | |
| try: | |
| r = requests.get( | |
| f"https://huggingface.co/api/datasets/{HF_DATASET}/tree/main", | |
| headers=hf_headers(), timeout=10 | |
| ) | |
| info["hf_tree_status"] = r.status_code | |
| info["hf_files"] = [f.get("path") for f in r.json()] if r.ok else r.text[:200] | |
| except Exception as e: | |
| info["hf_tree"] = f"ERROR: {e}" | |
| else: | |
| info["hf_status"] = "HF_TOKEN or HF_DATASET not set β cache disabled" | |
| return jsonify(info) | |
| def static_files(path): | |
| return send_from_directory('.', path) | |
| if __name__ == '__main__': | |
| port = int(os.environ.get('PORT', 7860)) | |
| app.run(host='0.0.0.0', port=port, debug=False) |