|
|
""" |
|
|
UAE Knowledge System - FastAPI Backend |
|
|
Serves the HTML frontend and provides search API |
|
|
""" |
|
|
import json |
|
|
import os |
|
|
import httpx |
|
|
from datetime import datetime |
|
|
from pathlib import Path |
|
|
from typing import Dict, List, Optional |
|
|
|
|
|
from fastapi import FastAPI, Request |
|
|
from fastapi.staticfiles import StaticFiles |
|
|
from fastapi.responses import HTMLResponse, FileResponse |
|
|
from pydantic import BaseModel |
|
|
|
|
|
|
|
|
try: |
|
|
from dotenv import load_dotenv |
|
|
load_dotenv(Path(__file__).parent.parent / ".env") |
|
|
except ImportError: |
|
|
pass |
|
|
|
|
|
from .services import get_knowledge_base, get_retriever, search_knowledge_base, get_stats |
|
|
|
|
|
|
|
|
import sys |
|
|
sys.path.insert(0, str(Path(__file__).parent.parent / "ir")) |
|
|
try: |
|
|
import sheets_storage |
|
|
except ImportError: |
|
|
sheets_storage = None |
|
|
|
|
|
def is_sheets_enabled(): |
|
|
"""Check if sheets storage is available and configured (dynamic check).""" |
|
|
if sheets_storage is None: |
|
|
return False |
|
|
return sheets_storage.is_sheets_enabled() |
|
|
|
|
|
|
|
|
DEEPL_API_KEY = os.environ.get("DEEPL_API_KEY", "") |
|
|
DEEPL_API_URL = "https://api-free.deepl.com/v2/translate" |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
PROJECT_ROOT = Path(__file__).parent.parent |
|
|
FRONTEND_DIR = PROJECT_ROOT / "frontend" |
|
|
DATA_DIR = PROJECT_ROOT / "data" |
|
|
|
|
|
|
|
|
FEEDBACK_FILE = DATA_DIR / "feedback.json" |
|
|
|
|
|
|
|
|
TRANSLATION_CACHE_FILE = DATA_DIR / "translations_cache.json" |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
app = FastAPI(title="UAE Knowledge System", version="2.3.0") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class SearchRequest(BaseModel): |
|
|
query: str |
|
|
category: str |
|
|
|
|
|
|
|
|
class FeedbackRequest(BaseModel): |
|
|
query: str |
|
|
category: str |
|
|
entity_ratings: Dict[str, Dict[str, int]] |
|
|
notes: str |
|
|
results: List[str] |
|
|
|
|
|
|
|
|
class TranslateRequest(BaseModel): |
|
|
texts: List[str] |
|
|
target_lang: str |
|
|
|
|
|
|
|
|
class RatingRequest(BaseModel): |
|
|
query: str |
|
|
category: str |
|
|
entity_id: str |
|
|
entity_index: int |
|
|
rating_type: str |
|
|
rating_value: int |
|
|
|
|
|
|
|
|
class EntityFeedbackRequest(BaseModel): |
|
|
query_id: str |
|
|
query: str |
|
|
query_timestamp: str |
|
|
entity_id: str |
|
|
entity_name: str |
|
|
rank_position: int |
|
|
rank_score: float |
|
|
ratings: Dict[str, Optional[bool]] |
|
|
comment: str |
|
|
submitted_at: str |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
_translation_cache: Dict[str, str] = {} |
|
|
|
|
|
|
|
|
def load_translation_cache() -> None: |
|
|
"""Load translation cache from file""" |
|
|
global _translation_cache |
|
|
if TRANSLATION_CACHE_FILE.exists(): |
|
|
try: |
|
|
with open(TRANSLATION_CACHE_FILE, "r", encoding="utf-8") as f: |
|
|
_translation_cache = json.load(f) |
|
|
print(f"Loaded {len(_translation_cache)} cached translations") |
|
|
except Exception as e: |
|
|
print(f"Error loading translation cache: {e}") |
|
|
_translation_cache = {} |
|
|
else: |
|
|
_translation_cache = {} |
|
|
|
|
|
|
|
|
def save_translation_cache() -> None: |
|
|
"""Save translation cache to file""" |
|
|
try: |
|
|
DATA_DIR.mkdir(parents=True, exist_ok=True) |
|
|
with open(TRANSLATION_CACHE_FILE, "w", encoding="utf-8") as f: |
|
|
json.dump(_translation_cache, f, ensure_ascii=False, indent=2) |
|
|
except Exception as e: |
|
|
print(f"Error saving translation cache: {e}") |
|
|
|
|
|
|
|
|
async def translate_with_deepl(texts: List[str], target_lang: str) -> List[str]: |
|
|
"""Translate texts using DeepL API""" |
|
|
if not DEEPL_API_KEY: |
|
|
return texts |
|
|
|
|
|
|
|
|
lang_map = {"ar": "AR", "cn": "ZH"} |
|
|
deepl_lang = lang_map.get(target_lang.lower(), target_lang.upper()) |
|
|
|
|
|
|
|
|
results = [] |
|
|
texts_to_translate = [] |
|
|
text_indices = [] |
|
|
|
|
|
for i, text in enumerate(texts): |
|
|
cache_key = f"{text}:{deepl_lang}" |
|
|
if cache_key in _translation_cache: |
|
|
results.append(_translation_cache[cache_key]) |
|
|
else: |
|
|
results.append(None) |
|
|
texts_to_translate.append(text) |
|
|
text_indices.append(i) |
|
|
|
|
|
|
|
|
if texts_to_translate: |
|
|
try: |
|
|
async with httpx.AsyncClient() as client: |
|
|
response = await client.post( |
|
|
DEEPL_API_URL, |
|
|
headers={ |
|
|
"Authorization": f"DeepL-Auth-Key {DEEPL_API_KEY}" |
|
|
}, |
|
|
data={ |
|
|
"text": texts_to_translate, |
|
|
"target_lang": deepl_lang, |
|
|
"source_lang": "EN" |
|
|
}, |
|
|
timeout=30.0 |
|
|
) |
|
|
|
|
|
if response.status_code == 200: |
|
|
data = response.json() |
|
|
translations = data.get("translations", []) |
|
|
|
|
|
for j, trans in enumerate(translations): |
|
|
translated_text = trans.get("text", texts_to_translate[j]) |
|
|
original_idx = text_indices[j] |
|
|
results[original_idx] = translated_text |
|
|
|
|
|
|
|
|
cache_key = f"{texts_to_translate[j]}:{deepl_lang}" |
|
|
_translation_cache[cache_key] = translated_text |
|
|
|
|
|
|
|
|
save_translation_cache() |
|
|
else: |
|
|
|
|
|
for j, idx in enumerate(text_indices): |
|
|
results[idx] = texts_to_translate[j] |
|
|
|
|
|
except Exception as e: |
|
|
print(f"Translation error: {e}") |
|
|
|
|
|
for j, idx in enumerate(text_indices): |
|
|
results[idx] = texts_to_translate[j] |
|
|
|
|
|
return results |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@app.get("/", response_class=HTMLResponse) |
|
|
async def root(): |
|
|
"""Serve the main HTML page""" |
|
|
html_path = FRONTEND_DIR / "index.html" |
|
|
if html_path.exists(): |
|
|
return FileResponse(html_path) |
|
|
return HTMLResponse("<h1>UAE Knowledge System</h1><p>index.html not found</p>") |
|
|
|
|
|
|
|
|
@app.get("/api/stats") |
|
|
async def api_stats(): |
|
|
"""Get knowledge base statistics""" |
|
|
return get_stats() |
|
|
|
|
|
|
|
|
@app.post("/api/search") |
|
|
async def api_search(request: SearchRequest): |
|
|
"""Search the knowledge base""" |
|
|
try: |
|
|
results = search_knowledge_base(request.query, top_k=100) |
|
|
return { |
|
|
"results": results, |
|
|
"query": request.query, |
|
|
"category": request.category, |
|
|
"is_sensitive": False, |
|
|
"sensitive_topic": None, |
|
|
"sensitive_guidance": None |
|
|
} |
|
|
except Exception as e: |
|
|
import traceback |
|
|
return {"error": str(e), "traceback": traceback.format_exc()[:500]} |
|
|
|
|
|
|
|
|
@app.post("/api/feedback") |
|
|
async def api_feedback(request: FeedbackRequest, req: Request): |
|
|
"""Save user feedback""" |
|
|
try: |
|
|
|
|
|
DATA_DIR.mkdir(parents=True, exist_ok=True) |
|
|
|
|
|
|
|
|
client_ip = req.headers.get("x-forwarded-for", "").split(",")[0].strip() |
|
|
if not client_ip: |
|
|
client_ip = req.client.host if req.client else "unknown" |
|
|
|
|
|
feedback = { |
|
|
"timestamp": datetime.now().isoformat(), |
|
|
"client_ip": client_ip, |
|
|
"query": request.query, |
|
|
"category": request.category, |
|
|
"entity_ratings": request.entity_ratings, |
|
|
"notes": request.notes, |
|
|
"results": request.results |
|
|
} |
|
|
|
|
|
|
|
|
if FEEDBACK_FILE.exists(): |
|
|
with open(FEEDBACK_FILE, "r", encoding="utf-8") as f: |
|
|
all_feedback = json.load(f) |
|
|
else: |
|
|
all_feedback = [] |
|
|
|
|
|
all_feedback.append(feedback) |
|
|
|
|
|
|
|
|
with open(FEEDBACK_FILE, "w", encoding="utf-8") as f: |
|
|
json.dump(all_feedback, f, ensure_ascii=False, indent=2) |
|
|
|
|
|
return {"success": True, "total": len(all_feedback)} |
|
|
|
|
|
except Exception as e: |
|
|
return {"success": False, "error": str(e)} |
|
|
|
|
|
|
|
|
@app.post("/api/rating") |
|
|
async def api_rating(request: RatingRequest, req: Request): |
|
|
"""Save individual entity rating (auto-save on click)""" |
|
|
try: |
|
|
|
|
|
DATA_DIR.mkdir(parents=True, exist_ok=True) |
|
|
|
|
|
|
|
|
client_ip = req.headers.get("x-forwarded-for", "").split(",")[0].strip() |
|
|
if not client_ip: |
|
|
client_ip = req.client.host if req.client else "unknown" |
|
|
|
|
|
rating_file = DATA_DIR / "ratings.json" |
|
|
|
|
|
rating = { |
|
|
"timestamp": datetime.now().isoformat(), |
|
|
"client_ip": client_ip, |
|
|
"query": request.query, |
|
|
"category": request.category, |
|
|
"entity_id": request.entity_id, |
|
|
"entity_index": request.entity_index, |
|
|
"rating_type": request.rating_type, |
|
|
"rating_value": request.rating_value |
|
|
} |
|
|
|
|
|
|
|
|
sheets_saved = False |
|
|
if is_sheets_enabled() and sheets_storage: |
|
|
try: |
|
|
success = sheets_storage.save_rating_to_sheets( |
|
|
query=request.query, |
|
|
category=request.category or "Not selected", |
|
|
entity_id=request.entity_id, |
|
|
entity_name=request.entity_id, |
|
|
rank=request.entity_index + 1, |
|
|
score=0, |
|
|
rating=f"{request.rating_type}:{request.rating_value}", |
|
|
page=1, |
|
|
client_ip=client_ip |
|
|
) |
|
|
sheets_saved = success |
|
|
except Exception as e: |
|
|
print(f"Google Sheets save failed: {e}") |
|
|
|
|
|
|
|
|
if rating_file.exists(): |
|
|
with open(rating_file, "r", encoding="utf-8") as f: |
|
|
all_ratings = json.load(f) |
|
|
else: |
|
|
all_ratings = [] |
|
|
|
|
|
all_ratings.append(rating) |
|
|
|
|
|
with open(rating_file, "w", encoding="utf-8") as f: |
|
|
json.dump(all_ratings, f, ensure_ascii=False, indent=2) |
|
|
|
|
|
return {"success": True, "total": len(all_ratings), "sheets_saved": sheets_saved} |
|
|
|
|
|
except Exception as e: |
|
|
return {"success": False, "error": str(e)} |
|
|
|
|
|
|
|
|
@app.post("/api/entity-feedback") |
|
|
async def api_entity_feedback(request: EntityFeedbackRequest, req: Request): |
|
|
"""Save per-entity feedback with ratings and comment""" |
|
|
try: |
|
|
|
|
|
DATA_DIR.mkdir(parents=True, exist_ok=True) |
|
|
|
|
|
|
|
|
client_ip = req.headers.get("x-forwarded-for", "").split(",")[0].strip() |
|
|
if not client_ip: |
|
|
client_ip = req.client.host if req.client else "unknown" |
|
|
|
|
|
feedback_file = DATA_DIR / "entity_feedbacks.json" |
|
|
|
|
|
feedback = { |
|
|
"query_id": request.query_id, |
|
|
"query": request.query, |
|
|
"query_timestamp": request.query_timestamp, |
|
|
"user_ip": client_ip, |
|
|
"entity_id": request.entity_id, |
|
|
"entity_name": request.entity_name, |
|
|
"rank_position": request.rank_position, |
|
|
"rank_score": request.rank_score, |
|
|
"ratings": request.ratings, |
|
|
"comment": request.comment, |
|
|
"submitted_at": request.submitted_at |
|
|
} |
|
|
|
|
|
|
|
|
sheets_saved = False |
|
|
if is_sheets_enabled() and sheets_storage: |
|
|
try: |
|
|
|
|
|
ratings_str = json.dumps(request.ratings) if request.ratings else "" |
|
|
success = sheets_storage.save_rating_to_sheets( |
|
|
query=request.query, |
|
|
category=str(request.rank_position), |
|
|
entity_id=request.entity_id, |
|
|
entity_name=request.entity_name, |
|
|
rank=request.rank_position, |
|
|
score=request.rank_score, |
|
|
rating=ratings_str, |
|
|
page=1, |
|
|
client_ip=client_ip, |
|
|
comment=request.comment or "", |
|
|
query_id=request.query_id |
|
|
) |
|
|
sheets_saved = success |
|
|
except Exception as e: |
|
|
print(f"Google Sheets save failed: {e}") |
|
|
|
|
|
|
|
|
if feedback_file.exists(): |
|
|
with open(feedback_file, "r", encoding="utf-8") as f: |
|
|
all_feedbacks = json.load(f) |
|
|
else: |
|
|
all_feedbacks = [] |
|
|
|
|
|
all_feedbacks.append(feedback) |
|
|
|
|
|
with open(feedback_file, "w", encoding="utf-8") as f: |
|
|
json.dump(all_feedbacks, f, ensure_ascii=False, indent=2) |
|
|
|
|
|
return {"success": True, "total": len(all_feedbacks), "sheets_saved": sheets_saved} |
|
|
|
|
|
except Exception as e: |
|
|
return {"success": False, "error": str(e)} |
|
|
|
|
|
|
|
|
@app.post("/api/translate") |
|
|
async def api_translate(request: TranslateRequest): |
|
|
"""Translate texts using DeepL API""" |
|
|
try: |
|
|
if not DEEPL_API_KEY: |
|
|
return { |
|
|
"success": False, |
|
|
"error": "Translation not configured (DEEPL_API_KEY not set)", |
|
|
"translations": request.texts |
|
|
} |
|
|
|
|
|
translations = await translate_with_deepl(request.texts, request.target_lang) |
|
|
return { |
|
|
"success": True, |
|
|
"translations": translations, |
|
|
"target_lang": request.target_lang |
|
|
} |
|
|
except Exception as e: |
|
|
return { |
|
|
"success": False, |
|
|
"error": str(e), |
|
|
"translations": request.texts |
|
|
} |
|
|
|
|
|
|
|
|
@app.get("/api/translate/status") |
|
|
async def api_translate_status(): |
|
|
"""Check if translation is available""" |
|
|
return { |
|
|
"available": bool(DEEPL_API_KEY), |
|
|
"provider": "DeepL" if DEEPL_API_KEY else None |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
app.mount("/css", StaticFiles(directory=str(FRONTEND_DIR / "css")), name="css") |
|
|
|
|
|
|
|
|
app.mount("/js", StaticFiles(directory=str(FRONTEND_DIR / "js")), name="js") |
|
|
|
|
|
|
|
|
app.mount("/assets", StaticFiles(directory=str(FRONTEND_DIR / "assets")), name="assets") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@app.on_event("startup") |
|
|
async def startup_event(): |
|
|
"""Pre-load retriever and cache on startup""" |
|
|
print("Starting UAE Knowledge System API...") |
|
|
|
|
|
load_translation_cache() |
|
|
|
|
|
get_knowledge_base() |
|
|
get_retriever() |
|
|
print("System ready!") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
import uvicorn |
|
|
uvicorn.run(app, host="0.0.0.0", port=7860) |