Spaces:
Sleeping
Sleeping
File size: 5,510 Bytes
d39e2e6 ed10f86 d39e2e6 8a2a935 ed10f86 9eb0671 d39e2e6 8a2a935 d83ed68 ed10f86 d39e2e6 1586bc2 ed10f86 6747594 8a2a935 ed10f86 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 | from flask import Flask, send_from_directory, request, jsonify
import requests
import os
import json
import base64
from datetime import datetime, timezone
app = Flask(__name__, static_folder='.')
TAVILY_KEY = os.environ.get("TAVILY_KEY", "")
HF_TOKEN = os.environ.get("HF_TOKEN", "")
HF_DATASET = os.environ.get("HF_DATASET", "") # e.g. "AnesNT/surfgo-cache"
# βββ HuggingFace Dataset helpers ββββββββββββββββββββββββββββββββββββββββββββ
def hf_headers():
return {"Authorization": f"Bearer {HF_TOKEN}", "Content-Type": "application/json"}
def hf_get_cache():
"""Download cache.json from HF dataset. Returns dict."""
url = f"https://huggingface.co/datasets/{HF_DATASET}/resolve/main/cache.json"
r = requests.get(url, headers={"Authorization": f"Bearer {HF_TOKEN}"}, timeout=10)
if r.status_code == 200:
return r.json()
return {}
def hf_put_cache(cache: dict):
"""Upload updated cache.json to HF dataset."""
content = json.dumps(cache, ensure_ascii=False, indent=2)
encoded = base64.b64encode(content.encode()).decode()
# Get current file SHA for update
sha = None
r = requests.get(
f"https://huggingface.co/api/datasets/{HF_DATASET}/tree/main",
headers=hf_headers(), timeout=10
)
if r.status_code == 200:
for f in r.json():
if f.get("path") == "cache.json":
sha = f.get("oid")
break
payload = {
"message": f"surfgo cache β {datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M UTC')}",
"content": encoded,
}
if sha:
payload["sha"] = sha
requests.put(
f"https://huggingface.co/api/datasets/{HF_DATASET}/upload/main/cache.json",
headers=hf_headers(), json=payload, timeout=15
)
def normalize(q: str) -> str:
return q.strip().lower()
# βββ Routes βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
@app.route('/')
def index():
return send_from_directory('.', 'index.html')
@app.route('/search', methods=['POST'])
def search():
if not TAVILY_KEY:
return jsonify({"error": "TAVILY_KEY not set"}), 500
body = request.get_json()
query = body.get("query", "").strip()
key = normalize(query)
use_hf = bool(HF_TOKEN and HF_DATASET)
# ββ 1. Check cache first βββββββββββββββββββββββββββββββββββββββββββββββββ
if use_hf:
try:
cache = hf_get_cache()
if key in cache:
entry = cache[key]
# Bump hit counter
cache[key]["hits"] = entry.get("hits", 0) + 1
try: hf_put_cache(cache)
except: pass
result = entry["result"]
result["_cached"] = True
result["_hits"] = cache[key]["hits"]
result["_stored_at"] = entry.get("stored_at", "")
return jsonify(result)
except Exception as e:
print(f"[cache read error] {e}")
# ββ 2. Cache miss β call Tavily ββββββββββββββββββββββββββββββββββββββββββ
payload = {**body, "api_key": TAVILY_KEY}
resp = requests.post("https://api.tavily.com/search", json=payload, timeout=20)
data = resp.json()
# ββ 3. Store new result in HF ββββββββββββββββββββββββββββββββββββββββββββ
if use_hf and resp.status_code == 200:
try:
cache = hf_get_cache()
cache[key] = {
"query": query,
"result": data,
"hits": 1,
"stored_at": datetime.now(timezone.utc).isoformat(),
}
hf_put_cache(cache)
except Exception as e:
print(f"[cache write error] {e}")
return jsonify(data), resp.status_code
@app.route('/debug')
def debug():
info = {
"TAVILY_KEY_set": bool(TAVILY_KEY),
"HF_TOKEN_set": bool(HF_TOKEN),
"HF_DATASET": HF_DATASET or "NOT SET",
}
if HF_TOKEN and HF_DATASET:
try:
cache = hf_get_cache()
info["hf_read"] = "OK"
info["cache_keys_count"] = len(cache)
info["cache_keys"] = list(cache.keys())[:20]
except Exception as e:
info["hf_read"] = f"ERROR: {e}"
try:
r = requests.get(
f"https://huggingface.co/api/datasets/{HF_DATASET}/tree/main",
headers=hf_headers(), timeout=10
)
info["hf_tree_status"] = r.status_code
info["hf_files"] = [f.get("path") for f in r.json()] if r.ok else r.text[:200]
except Exception as e:
info["hf_tree"] = f"ERROR: {e}"
else:
info["hf_status"] = "HF_TOKEN or HF_DATASET not set β cache disabled"
return jsonify(info)
@app.route('/<path:path>')
def static_files(path):
return send_from_directory('.', path)
if __name__ == '__main__':
port = int(os.environ.get('PORT', 7860))
app.run(host='0.0.0.0', port=port, debug=False) |