from flask import Flask, send_from_directory, request, jsonify import requests import os import json import base64 from datetime import datetime, timezone app = Flask(__name__, static_folder='.') TAVILY_KEY = os.environ.get("TAVILY_KEY", "") HF_TOKEN = os.environ.get("HF_TOKEN", "") HF_DATASET = os.environ.get("HF_DATASET", "") # e.g. "AnesNT/surfgo-cache" # ─── HuggingFace Dataset helpers ──────────────────────────────────────────── def hf_headers(): return {"Authorization": f"Bearer {HF_TOKEN}", "Content-Type": "application/json"} def hf_get_cache(): """Download cache.json from HF dataset. Returns dict.""" url = f"https://huggingface.co/datasets/{HF_DATASET}/resolve/main/cache.json" r = requests.get(url, headers={"Authorization": f"Bearer {HF_TOKEN}"}, timeout=10) if r.status_code == 200: return r.json() return {} def hf_put_cache(cache: dict): """Upload updated cache.json to HF dataset.""" content = json.dumps(cache, ensure_ascii=False, indent=2) encoded = base64.b64encode(content.encode()).decode() # Get current file SHA for update sha = None r = requests.get( f"https://huggingface.co/api/datasets/{HF_DATASET}/tree/main", headers=hf_headers(), timeout=10 ) if r.status_code == 200: for f in r.json(): if f.get("path") == "cache.json": sha = f.get("oid") break payload = { "message": f"surfgo cache — {datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M UTC')}", "content": encoded, } if sha: payload["sha"] = sha requests.put( f"https://huggingface.co/api/datasets/{HF_DATASET}/upload/main/cache.json", headers=hf_headers(), json=payload, timeout=15 ) def normalize(q: str) -> str: return q.strip().lower() # ─── Routes ───────────────────────────────────────────────────────────────── @app.route('/') def index(): return send_from_directory('.', 'index.html') @app.route('/search', methods=['POST']) def search(): if not TAVILY_KEY: return jsonify({"error": "TAVILY_KEY not set"}), 500 body = request.get_json() query = body.get("query", "").strip() key = normalize(query) use_hf = bool(HF_TOKEN and HF_DATASET) # ── 1. Check cache first ───────────────────────────────────────────────── if use_hf: try: cache = hf_get_cache() if key in cache: entry = cache[key] # Bump hit counter cache[key]["hits"] = entry.get("hits", 0) + 1 try: hf_put_cache(cache) except: pass result = entry["result"] result["_cached"] = True result["_hits"] = cache[key]["hits"] result["_stored_at"] = entry.get("stored_at", "") return jsonify(result) except Exception as e: print(f"[cache read error] {e}") # ── 2. Cache miss → call Tavily ────────────────────────────────────────── payload = {**body, "api_key": TAVILY_KEY} resp = requests.post("https://api.tavily.com/search", json=payload, timeout=20) data = resp.json() # ── 3. Store new result in HF ──────────────────────────────────────────── if use_hf and resp.status_code == 200: try: cache = hf_get_cache() cache[key] = { "query": query, "result": data, "hits": 1, "stored_at": datetime.now(timezone.utc).isoformat(), } hf_put_cache(cache) except Exception as e: print(f"[cache write error] {e}") return jsonify(data), resp.status_code @app.route('/debug') def debug(): info = { "TAVILY_KEY_set": bool(TAVILY_KEY), "HF_TOKEN_set": bool(HF_TOKEN), "HF_DATASET": HF_DATASET or "NOT SET", } if HF_TOKEN and HF_DATASET: try: cache = hf_get_cache() info["hf_read"] = "OK" info["cache_keys_count"] = len(cache) info["cache_keys"] = list(cache.keys())[:20] except Exception as e: info["hf_read"] = f"ERROR: {e}" try: r = requests.get( f"https://huggingface.co/api/datasets/{HF_DATASET}/tree/main", headers=hf_headers(), timeout=10 ) info["hf_tree_status"] = r.status_code info["hf_files"] = [f.get("path") for f in r.json()] if r.ok else r.text[:200] except Exception as e: info["hf_tree"] = f"ERROR: {e}" else: info["hf_status"] = "HF_TOKEN or HF_DATASET not set — cache disabled" return jsonify(info) @app.route('/') def static_files(path): return send_from_directory('.', path) if __name__ == '__main__': port = int(os.environ.get('PORT', 7860)) app.run(host='0.0.0.0', port=port, debug=False)