SurfGO-engine / app.py
AnesKAM's picture
Update app.py
1586bc2 verified
from flask import Flask, send_from_directory, request, jsonify
import requests
import os
import json
import base64
from datetime import datetime, timezone
app = Flask(__name__, static_folder='.')
TAVILY_KEY = os.environ.get("TAVILY_KEY", "")
HF_TOKEN = os.environ.get("HF_TOKEN", "")
HF_DATASET = os.environ.get("HF_DATASET", "") # e.g. "AnesNT/surfgo-cache"
# ─── HuggingFace Dataset helpers ────────────────────────────────────────────
def hf_headers():
return {"Authorization": f"Bearer {HF_TOKEN}", "Content-Type": "application/json"}
def hf_get_cache():
"""Download cache.json from HF dataset. Returns dict."""
url = f"https://huggingface.co/datasets/{HF_DATASET}/resolve/main/cache.json"
r = requests.get(url, headers={"Authorization": f"Bearer {HF_TOKEN}"}, timeout=10)
if r.status_code == 200:
return r.json()
return {}
def hf_put_cache(cache: dict):
"""Upload updated cache.json to HF dataset."""
content = json.dumps(cache, ensure_ascii=False, indent=2)
encoded = base64.b64encode(content.encode()).decode()
# Get current file SHA for update
sha = None
r = requests.get(
f"https://huggingface.co/api/datasets/{HF_DATASET}/tree/main",
headers=hf_headers(), timeout=10
)
if r.status_code == 200:
for f in r.json():
if f.get("path") == "cache.json":
sha = f.get("oid")
break
payload = {
"message": f"surfgo cache β€” {datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M UTC')}",
"content": encoded,
}
if sha:
payload["sha"] = sha
requests.put(
f"https://huggingface.co/api/datasets/{HF_DATASET}/upload/main/cache.json",
headers=hf_headers(), json=payload, timeout=15
)
def normalize(q: str) -> str:
return q.strip().lower()
# ─── Routes ─────────────────────────────────────────────────────────────────
@app.route('/')
def index():
return send_from_directory('.', 'index.html')
@app.route('/search', methods=['POST'])
def search():
if not TAVILY_KEY:
return jsonify({"error": "TAVILY_KEY not set"}), 500
body = request.get_json()
query = body.get("query", "").strip()
key = normalize(query)
use_hf = bool(HF_TOKEN and HF_DATASET)
# ── 1. Check cache first ─────────────────────────────────────────────────
if use_hf:
try:
cache = hf_get_cache()
if key in cache:
entry = cache[key]
# Bump hit counter
cache[key]["hits"] = entry.get("hits", 0) + 1
try: hf_put_cache(cache)
except: pass
result = entry["result"]
result["_cached"] = True
result["_hits"] = cache[key]["hits"]
result["_stored_at"] = entry.get("stored_at", "")
return jsonify(result)
except Exception as e:
print(f"[cache read error] {e}")
# ── 2. Cache miss β†’ call Tavily ──────────────────────────────────────────
payload = {**body, "api_key": TAVILY_KEY}
resp = requests.post("https://api.tavily.com/search", json=payload, timeout=20)
data = resp.json()
# ── 3. Store new result in HF ────────────────────────────────────────────
if use_hf and resp.status_code == 200:
try:
cache = hf_get_cache()
cache[key] = {
"query": query,
"result": data,
"hits": 1,
"stored_at": datetime.now(timezone.utc).isoformat(),
}
hf_put_cache(cache)
except Exception as e:
print(f"[cache write error] {e}")
return jsonify(data), resp.status_code
@app.route('/debug')
def debug():
info = {
"TAVILY_KEY_set": bool(TAVILY_KEY),
"HF_TOKEN_set": bool(HF_TOKEN),
"HF_DATASET": HF_DATASET or "NOT SET",
}
if HF_TOKEN and HF_DATASET:
try:
cache = hf_get_cache()
info["hf_read"] = "OK"
info["cache_keys_count"] = len(cache)
info["cache_keys"] = list(cache.keys())[:20]
except Exception as e:
info["hf_read"] = f"ERROR: {e}"
try:
r = requests.get(
f"https://huggingface.co/api/datasets/{HF_DATASET}/tree/main",
headers=hf_headers(), timeout=10
)
info["hf_tree_status"] = r.status_code
info["hf_files"] = [f.get("path") for f in r.json()] if r.ok else r.text[:200]
except Exception as e:
info["hf_tree"] = f"ERROR: {e}"
else:
info["hf_status"] = "HF_TOKEN or HF_DATASET not set β€” cache disabled"
return jsonify(info)
@app.route('/<path:path>')
def static_files(path):
return send_from_directory('.', path)
if __name__ == '__main__':
port = int(os.environ.get('PORT', 7860))
app.run(host='0.0.0.0', port=port, debug=False)