jinruiyang Claude Opus 4.5 commited on
Commit
da9db52
·
1 Parent(s): 6287ed2

Add backend API module for FastAPI server

Browse files

- backend/__init__.py
- backend/api.py - FastAPI routes for search, feedback, translation
- backend/services.py - Service layer for IR operations

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>

Files changed (3) hide show
  1. backend/__init__.py +6 -0
  2. backend/api.py +305 -0
  3. backend/services.py +133 -0
backend/__init__.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ """
2
+ UAE Knowledge System - Backend Package
3
+ """
4
+ from .api import app
5
+
6
+ __all__ = ["app"]
backend/api.py ADDED
@@ -0,0 +1,305 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ UAE Knowledge System - FastAPI Backend
3
+ Serves the HTML frontend and provides search API
4
+ """
5
+ import json
6
+ import os
7
+ import httpx
8
+ from datetime import datetime
9
+ from pathlib import Path
10
+ from typing import Dict, List, Optional
11
+
12
+ from fastapi import FastAPI, Request
13
+ from fastapi.staticfiles import StaticFiles
14
+ from fastapi.responses import HTMLResponse, FileResponse
15
+ from pydantic import BaseModel
16
+
17
+ from .services import get_knowledge_base, get_retriever, search_knowledge_base, get_stats
18
+
19
+ # DeepL API configuration
20
+ DEEPL_API_KEY = os.environ.get("DEEPL_API_KEY", "")
21
+ DEEPL_API_URL = "https://api-free.deepl.com/v2/translate" # Use api.deepl.com for paid plans
22
+
23
+ # ============================================================
24
+ # Path Configuration
25
+ # ============================================================
26
+ PROJECT_ROOT = Path(__file__).parent.parent
27
+ FRONTEND_DIR = PROJECT_ROOT / "frontend"
28
+ DATA_DIR = PROJECT_ROOT / "data"
29
+
30
+ # Feedback file location - use /data for HF Spaces persistence
31
+ FEEDBACK_FILE = DATA_DIR / "feedback.json"
32
+
33
+ # Translation cache file - persistent across restarts
34
+ TRANSLATION_CACHE_FILE = DATA_DIR / "translations_cache.json"
35
+
36
+ # ============================================================
37
+ # Initialize FastAPI
38
+ # ============================================================
39
+ app = FastAPI(title="UAE Knowledge System", version="2.3.0")
40
+
41
+
42
+ # ============================================================
43
+ # Request/Response Models
44
+ # ============================================================
45
+ class SearchRequest(BaseModel):
46
+ query: str
47
+ category: str
48
+
49
+
50
+ class FeedbackRequest(BaseModel):
51
+ query: str
52
+ category: str
53
+ entity_ratings: Dict[str, Dict[str, int]]
54
+ notes: str
55
+ results: List[str]
56
+
57
+
58
+ class TranslateRequest(BaseModel):
59
+ texts: List[str] # List of texts to translate
60
+ target_lang: str # AR or ZH (DeepL uses ZH for Chinese)
61
+
62
+
63
+ # ============================================================
64
+ # Translation Cache (file-based, persistent across restarts)
65
+ # ============================================================
66
+ _translation_cache: Dict[str, str] = {} # {text:lang: translated}
67
+
68
+
69
+ def load_translation_cache() -> None:
70
+ """Load translation cache from file"""
71
+ global _translation_cache
72
+ if TRANSLATION_CACHE_FILE.exists():
73
+ try:
74
+ with open(TRANSLATION_CACHE_FILE, "r", encoding="utf-8") as f:
75
+ _translation_cache = json.load(f)
76
+ print(f"Loaded {len(_translation_cache)} cached translations")
77
+ except Exception as e:
78
+ print(f"Error loading translation cache: {e}")
79
+ _translation_cache = {}
80
+ else:
81
+ _translation_cache = {}
82
+
83
+
84
+ def save_translation_cache() -> None:
85
+ """Save translation cache to file"""
86
+ try:
87
+ DATA_DIR.mkdir(parents=True, exist_ok=True)
88
+ with open(TRANSLATION_CACHE_FILE, "w", encoding="utf-8") as f:
89
+ json.dump(_translation_cache, f, ensure_ascii=False, indent=2)
90
+ except Exception as e:
91
+ print(f"Error saving translation cache: {e}")
92
+
93
+
94
+ async def translate_with_deepl(texts: List[str], target_lang: str) -> List[str]:
95
+ """Translate texts using DeepL API"""
96
+ if not DEEPL_API_KEY:
97
+ return texts # Return original if no API key
98
+
99
+ # Map our language codes to DeepL codes
100
+ lang_map = {"ar": "AR", "cn": "ZH"}
101
+ deepl_lang = lang_map.get(target_lang.lower(), target_lang.upper())
102
+
103
+ # Check cache first
104
+ results = []
105
+ texts_to_translate = []
106
+ text_indices = []
107
+
108
+ for i, text in enumerate(texts):
109
+ cache_key = f"{text}:{deepl_lang}"
110
+ if cache_key in _translation_cache:
111
+ results.append(_translation_cache[cache_key])
112
+ else:
113
+ results.append(None) # Placeholder
114
+ texts_to_translate.append(text)
115
+ text_indices.append(i)
116
+
117
+ # Translate uncached texts
118
+ if texts_to_translate:
119
+ try:
120
+ async with httpx.AsyncClient() as client:
121
+ response = await client.post(
122
+ DEEPL_API_URL,
123
+ data={
124
+ "auth_key": DEEPL_API_KEY,
125
+ "text": texts_to_translate,
126
+ "target_lang": deepl_lang,
127
+ "source_lang": "EN"
128
+ },
129
+ timeout=30.0
130
+ )
131
+
132
+ if response.status_code == 200:
133
+ data = response.json()
134
+ translations = data.get("translations", [])
135
+
136
+ for j, trans in enumerate(translations):
137
+ translated_text = trans.get("text", texts_to_translate[j])
138
+ original_idx = text_indices[j]
139
+ results[original_idx] = translated_text
140
+
141
+ # Cache the translation
142
+ cache_key = f"{texts_to_translate[j]}:{deepl_lang}"
143
+ _translation_cache[cache_key] = translated_text
144
+
145
+ # Save cache to file after new translations
146
+ save_translation_cache()
147
+ else:
148
+ # On error, use original texts
149
+ for j, idx in enumerate(text_indices):
150
+ results[idx] = texts_to_translate[j]
151
+
152
+ except Exception as e:
153
+ print(f"Translation error: {e}")
154
+ # On error, use original texts
155
+ for j, idx in enumerate(text_indices):
156
+ results[idx] = texts_to_translate[j]
157
+
158
+ return results
159
+
160
+
161
+ # ============================================================
162
+ # API Endpoints
163
+ # ============================================================
164
+ @app.get("/", response_class=HTMLResponse)
165
+ async def root():
166
+ """Serve the main HTML page"""
167
+ html_path = FRONTEND_DIR / "index.html"
168
+ if html_path.exists():
169
+ return FileResponse(html_path)
170
+ return HTMLResponse("<h1>UAE Knowledge System</h1><p>index.html not found</p>")
171
+
172
+
173
+ @app.get("/api/stats")
174
+ async def api_stats():
175
+ """Get knowledge base statistics"""
176
+ return get_stats()
177
+
178
+
179
+ @app.post("/api/search")
180
+ async def api_search(request: SearchRequest):
181
+ """Search the knowledge base"""
182
+ try:
183
+ results = search_knowledge_base(request.query, top_k=5)
184
+ return {
185
+ "results": results,
186
+ "query": request.query,
187
+ "category": request.category,
188
+ "is_sensitive": False,
189
+ "sensitive_topic": None,
190
+ "sensitive_guidance": None
191
+ }
192
+ except Exception as e:
193
+ import traceback
194
+ return {"error": str(e), "traceback": traceback.format_exc()[:500]}
195
+
196
+
197
+ @app.post("/api/feedback")
198
+ async def api_feedback(request: FeedbackRequest, req: Request):
199
+ """Save user feedback"""
200
+ try:
201
+ # Ensure data directory exists
202
+ DATA_DIR.mkdir(parents=True, exist_ok=True)
203
+
204
+ # Get client IP
205
+ client_ip = req.headers.get("x-forwarded-for", "").split(",")[0].strip()
206
+ if not client_ip:
207
+ client_ip = req.client.host if req.client else "unknown"
208
+
209
+ feedback = {
210
+ "timestamp": datetime.now().isoformat(),
211
+ "client_ip": client_ip,
212
+ "query": request.query,
213
+ "category": request.category,
214
+ "entity_ratings": request.entity_ratings,
215
+ "notes": request.notes,
216
+ "results": request.results
217
+ }
218
+
219
+ # Load existing feedback
220
+ if FEEDBACK_FILE.exists():
221
+ with open(FEEDBACK_FILE, "r", encoding="utf-8") as f:
222
+ all_feedback = json.load(f)
223
+ else:
224
+ all_feedback = []
225
+
226
+ all_feedback.append(feedback)
227
+
228
+ # Save feedback
229
+ with open(FEEDBACK_FILE, "w", encoding="utf-8") as f:
230
+ json.dump(all_feedback, f, ensure_ascii=False, indent=2)
231
+
232
+ return {"success": True, "total": len(all_feedback)}
233
+
234
+ except Exception as e:
235
+ return {"success": False, "error": str(e)}
236
+
237
+
238
+ @app.post("/api/translate")
239
+ async def api_translate(request: TranslateRequest):
240
+ """Translate texts using DeepL API"""
241
+ try:
242
+ if not DEEPL_API_KEY:
243
+ return {
244
+ "success": False,
245
+ "error": "Translation not configured (DEEPL_API_KEY not set)",
246
+ "translations": request.texts # Return original texts
247
+ }
248
+
249
+ translations = await translate_with_deepl(request.texts, request.target_lang)
250
+ return {
251
+ "success": True,
252
+ "translations": translations,
253
+ "target_lang": request.target_lang
254
+ }
255
+ except Exception as e:
256
+ return {
257
+ "success": False,
258
+ "error": str(e),
259
+ "translations": request.texts # Return original on error
260
+ }
261
+
262
+
263
+ @app.get("/api/translate/status")
264
+ async def api_translate_status():
265
+ """Check if translation is available"""
266
+ return {
267
+ "available": bool(DEEPL_API_KEY),
268
+ "provider": "DeepL" if DEEPL_API_KEY else None
269
+ }
270
+
271
+
272
+ # ============================================================
273
+ # Static Files - Serve frontend assets
274
+ # ============================================================
275
+ # Mount CSS
276
+ app.mount("/css", StaticFiles(directory=str(FRONTEND_DIR / "css")), name="css")
277
+
278
+ # Mount JavaScript
279
+ app.mount("/js", StaticFiles(directory=str(FRONTEND_DIR / "js")), name="js")
280
+
281
+ # Mount assets (images)
282
+ app.mount("/assets", StaticFiles(directory=str(FRONTEND_DIR / "assets")), name="assets")
283
+
284
+
285
+ # ============================================================
286
+ # Startup Event
287
+ # ============================================================
288
+ @app.on_event("startup")
289
+ async def startup_event():
290
+ """Pre-load retriever and cache on startup"""
291
+ print("Starting UAE Knowledge System API...")
292
+ # Load translation cache from file
293
+ load_translation_cache()
294
+ # Pre-load in background to speed up first request
295
+ get_knowledge_base()
296
+ get_retriever()
297
+ print("System ready!")
298
+
299
+
300
+ # ============================================================
301
+ # Run with Uvicorn (for direct execution)
302
+ # ============================================================
303
+ if __name__ == "__main__":
304
+ import uvicorn
305
+ uvicorn.run(app, host="0.0.0.0", port=7860)
backend/services.py ADDED
@@ -0,0 +1,133 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ UAE Knowledge System - Backend Services
3
+ Handles knowledge base and retriever initialization
4
+ """
5
+ import sys
6
+ from pathlib import Path
7
+
8
+ # Add parent directory to path for imports
9
+ sys.path.insert(0, str(Path(__file__).parent.parent))
10
+
11
+ from ir.retriever import EntityRetriever, RetrievalOutput
12
+ from ir.knowledge_base import KnowledgeBase
13
+
14
+ # ============================================================
15
+ # Global State
16
+ # ============================================================
17
+ _retriever = None
18
+ _knowledge_base = None
19
+
20
+ # Paths relative to project root
21
+ PROJECT_ROOT = Path(__file__).parent.parent
22
+ INDEX_CACHE_PATH = PROJECT_ROOT / "ir" / "cache" / "dense_index"
23
+
24
+
25
+ def get_knowledge_base() -> KnowledgeBase:
26
+ """Lazy load knowledge base"""
27
+ global _knowledge_base
28
+ if _knowledge_base is None:
29
+ print("Loading knowledge base...")
30
+ _knowledge_base = KnowledgeBase(debug=False)
31
+ return _knowledge_base
32
+
33
+
34
+ def get_retriever():
35
+ """Get the dense retriever (cached)"""
36
+ global _retriever
37
+ if _retriever is not None:
38
+ return _retriever
39
+
40
+ from ir.retrievers.dense import DenseRetriever
41
+
42
+ print("Loading dense retriever...")
43
+ retriever = DenseRetriever(model_name="bge-m3", debug=False)
44
+ kb = get_knowledge_base()
45
+
46
+ # Try to load cached index
47
+ if INDEX_CACHE_PATH.exists():
48
+ print(f"Loading cached index from {INDEX_CACHE_PATH}...")
49
+ if retriever.load_index(str(INDEX_CACHE_PATH)):
50
+ print("Cached index loaded!")
51
+ else:
52
+ print("Cache load failed, building index...")
53
+ retriever.build_index_from_knowledge_base(kb)
54
+ retriever.save_index(str(INDEX_CACHE_PATH))
55
+ else:
56
+ print("Building dense index (this may take a while)...")
57
+ retriever.build_index_from_knowledge_base(kb)
58
+ INDEX_CACHE_PATH.parent.mkdir(parents=True, exist_ok=True)
59
+ retriever.save_index(str(INDEX_CACHE_PATH))
60
+ print("Index built and cached!")
61
+
62
+ _retriever = retriever
63
+ return retriever
64
+
65
+
66
+ def search_knowledge_base(query: str, top_k: int = 5):
67
+ """
68
+ Search the knowledge base and return formatted results
69
+ """
70
+ retriever = get_retriever()
71
+ kb = get_knowledge_base()
72
+
73
+ # Perform search
74
+ results = retriever.search(query, top_k=top_k)
75
+
76
+ # Format results
77
+ formatted_results = []
78
+ for metadata, score in results:
79
+ entity_id = metadata.get("entity_id", "")
80
+ entity_name = metadata.get("entity_name", "Unknown")
81
+
82
+ # Get full entity data from KB
83
+ raw_data = kb.get_raw_entity(entity_id) if entity_id else None
84
+
85
+ result = {
86
+ "entity_id": entity_id,
87
+ "entity_name": entity_name,
88
+ "score": score,
89
+ "chunk_type": metadata.get("chunk_type", ""),
90
+ "subcategory": "",
91
+ "emirate": "",
92
+ "is_royal": False,
93
+ "summary": "",
94
+ "must_answer": []
95
+ }
96
+
97
+ if raw_data:
98
+ facts_data = raw_data.get('facts', {})
99
+ metadata_kb = raw_data.get('metadata', {})
100
+
101
+ result["subcategory"] = raw_data.get('subcategory', '')
102
+ result["emirate"] = metadata_kb.get('emirate', '')
103
+ result["is_royal"] = metadata_kb.get('is_royal', False)
104
+ result["summary"] = facts_data.get('summary_paragraph', '')
105
+
106
+ # Extract must-answer facts
107
+ must_answer = facts_data.get('must_answer', [])
108
+ result["must_answer"] = [
109
+ fact.get('fact', fact) if isinstance(fact, dict) else str(fact)
110
+ for fact in must_answer[:5]
111
+ ]
112
+
113
+ # Include full entity data for detailed view
114
+ result["full_entity"] = raw_data
115
+
116
+ formatted_results.append(result)
117
+
118
+ return formatted_results
119
+
120
+
121
+ def get_stats():
122
+ """Get knowledge base statistics"""
123
+ try:
124
+ kb = get_knowledge_base()
125
+ entities = len(kb.entities)
126
+ # Fixed: 8 knowledge categories as defined in the system
127
+ return {
128
+ "entities": entities,
129
+ "categories": 8,
130
+ "version": "2.3.0"
131
+ }
132
+ except Exception as e:
133
+ return {"entities": 0, "categories": 8, "error": str(e)}