File size: 5,510 Bytes
d39e2e6
 
ed10f86
d39e2e6
 
 
8a2a935
ed10f86
9eb0671
d39e2e6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8a2a935
d83ed68
ed10f86
 
d39e2e6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1586bc2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ed10f86
 
 
6747594
8a2a935
ed10f86
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
from flask import Flask, send_from_directory, request, jsonify
import requests
import os
import json
import base64
from datetime import datetime, timezone

app = Flask(__name__, static_folder='.')

TAVILY_KEY = os.environ.get("TAVILY_KEY", "")
HF_TOKEN   = os.environ.get("HF_TOKEN", "")
HF_DATASET = os.environ.get("HF_DATASET", "")  # e.g. "AnesNT/surfgo-cache"

# ─── HuggingFace Dataset helpers ────────────────────────────────────────────

def hf_headers():
    return {"Authorization": f"Bearer {HF_TOKEN}", "Content-Type": "application/json"}

def hf_get_cache():
    """Download cache.json from HF dataset. Returns dict."""
    url = f"https://huggingface.co/datasets/{HF_DATASET}/resolve/main/cache.json"
    r = requests.get(url, headers={"Authorization": f"Bearer {HF_TOKEN}"}, timeout=10)
    if r.status_code == 200:
        return r.json()
    return {}

def hf_put_cache(cache: dict):
    """Upload updated cache.json to HF dataset."""
    content  = json.dumps(cache, ensure_ascii=False, indent=2)
    encoded  = base64.b64encode(content.encode()).decode()

    # Get current file SHA for update
    sha = None
    r = requests.get(
        f"https://huggingface.co/api/datasets/{HF_DATASET}/tree/main",
        headers=hf_headers(), timeout=10
    )
    if r.status_code == 200:
        for f in r.json():
            if f.get("path") == "cache.json":
                sha = f.get("oid")
                break

    payload = {
        "message": f"surfgo cache β€” {datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M UTC')}",
        "content": encoded,
    }
    if sha:
        payload["sha"] = sha

    requests.put(
        f"https://huggingface.co/api/datasets/{HF_DATASET}/upload/main/cache.json",
        headers=hf_headers(), json=payload, timeout=15
    )

def normalize(q: str) -> str:
    return q.strip().lower()

# ─── Routes ─────────────────────────────────────────────────────────────────

@app.route('/')
def index():
    return send_from_directory('.', 'index.html')

@app.route('/search', methods=['POST'])
def search():
    if not TAVILY_KEY:
        return jsonify({"error": "TAVILY_KEY not set"}), 500

    body  = request.get_json()
    query = body.get("query", "").strip()
    key   = normalize(query)
    use_hf = bool(HF_TOKEN and HF_DATASET)

    # ── 1. Check cache first ─────────────────────────────────────────────────
    if use_hf:
        try:
            cache = hf_get_cache()
            if key in cache:
                entry = cache[key]
                # Bump hit counter
                cache[key]["hits"] = entry.get("hits", 0) + 1
                try: hf_put_cache(cache)
                except: pass
                result = entry["result"]
                result["_cached"]    = True
                result["_hits"]      = cache[key]["hits"]
                result["_stored_at"] = entry.get("stored_at", "")
                return jsonify(result)
        except Exception as e:
            print(f"[cache read error] {e}")

    # ── 2. Cache miss β†’ call Tavily ──────────────────────────────────────────
    payload = {**body, "api_key": TAVILY_KEY}
    resp = requests.post("https://api.tavily.com/search", json=payload, timeout=20)
    data = resp.json()

    # ── 3. Store new result in HF ────────────────────────────────────────────
    if use_hf and resp.status_code == 200:
        try:
            cache = hf_get_cache()
            cache[key] = {
                "query":     query,
                "result":    data,
                "hits":      1,
                "stored_at": datetime.now(timezone.utc).isoformat(),
            }
            hf_put_cache(cache)
        except Exception as e:
            print(f"[cache write error] {e}")

    return jsonify(data), resp.status_code

@app.route('/debug')
def debug():
    info = {
        "TAVILY_KEY_set": bool(TAVILY_KEY),
        "HF_TOKEN_set":   bool(HF_TOKEN),
        "HF_DATASET":     HF_DATASET or "NOT SET",
    }
    if HF_TOKEN and HF_DATASET:
        try:
            cache = hf_get_cache()
            info["hf_read"]          = "OK"
            info["cache_keys_count"] = len(cache)
            info["cache_keys"]       = list(cache.keys())[:20]
        except Exception as e:
            info["hf_read"] = f"ERROR: {e}"
        try:
            r = requests.get(
                f"https://huggingface.co/api/datasets/{HF_DATASET}/tree/main",
                headers=hf_headers(), timeout=10
            )
            info["hf_tree_status"] = r.status_code
            info["hf_files"] = [f.get("path") for f in r.json()] if r.ok else r.text[:200]
        except Exception as e:
            info["hf_tree"] = f"ERROR: {e}"
    else:
        info["hf_status"] = "HF_TOKEN or HF_DATASET not set β€” cache disabled"
    return jsonify(info)

@app.route('/<path:path>')
def static_files(path):
    return send_from_directory('.', path)

if __name__ == '__main__':
    port = int(os.environ.get('PORT', 7860))
    app.run(host='0.0.0.0', port=port, debug=False)