Spaces:

PraneshJs
/

RAG

Sleeping

App Files Files Community

PraneshJs commited on Nov 3, 2025

Commit

a9a9358

verified ·

1 Parent(s): a8404b2

fixed system prompt and response in txt

Browse files

Files changed (1) hide show

app.py +119 -234

app.py CHANGED Viewed

@@ -3,6 +3,7 @@ import os
 import asyncio
 import json
 import hashlib
 from io import BytesIO, StringIO
 from typing import List, Tuple
@@ -12,13 +13,9 @@ import faiss
 import requests
 import pandas as pd
 from sentence_transformers import SentenceTransformer
-# file parsing libs
 import fitz  # PyMuPDF
 import docx
 from pptx import Presentation
-# crawl4ai
 from crawl4ai import AsyncWebCrawler
 # ---------------- Config ----------------
@@ -26,24 +23,39 @@ OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY")
 OPENROUTER_MODEL = "nvidia/nemotron-nano-12b-v2-vl:free"
 EMBEDDING_MODEL_NAME = "all-MiniLM-L6-v2"
 CACHE_DIR = "./cache"
 os.makedirs(CACHE_DIR, exist_ok=True)
-# sentence-transformers embedder (loads once)
 embedder = SentenceTransformer(EMBEDDING_MODEL_NAME)
-# Global in-memory stores (cleared/updated by UI actions)
 DOCS: List[str] = []
 FILENAMES: List[str] = []
 EMBEDDINGS: np.ndarray = None
 FAISS_INDEX = None
 CURRENT_CACHE_KEY: str = ""
 # ---------------- File extraction helpers ----------------
 def extract_text_from_pdf(file_bytes: bytes) -> str:
     try:
         doc = fitz.open(stream=file_bytes, filetype="pdf")
-        pages = [page.get_text() for page in doc]
-        return "\n".join(pages)
     except Exception as e:
         return f"[PDF extraction error] {e}"
@@ -51,7 +63,7 @@ def extract_text_from_docx(file_bytes: bytes) -> str:
     try:
         f = BytesIO(file_bytes)
         doc = docx.Document(f)
-        return "\n".join([p.text for p in doc.paragraphs])
     except Exception as e:
         return f"[DOCX extraction error] {e}"
@@ -65,10 +77,7 @@ def extract_text_from_excel(file_bytes: bytes) -> str:
     try:
         f = BytesIO(file_bytes)
         df = pd.read_excel(f, dtype=str)
-        parts = []
-        for col in df.columns:
-            parts.append("\n".join(df[col].fillna("").astype(str).tolist()))
-        return "\n".join(parts)
     except Exception as e:
         return f"[EXCEL extraction error] {e}"
@@ -94,90 +103,57 @@ def extract_text_from_csv(file_bytes: bytes) -> str:
         return f"[CSV extraction error] {e}"
 def extract_text_from_file_tuple(file_tuple) -> Tuple[str, bytes]:
-    """
-    Accepts a Gradio file object/tuple and returns (filename, bytes).
-    Robust to multiple gradio versions.
-    """
-    # gradio v3.x passes TemporaryFile-like object with .name & .read()
     try:
         if hasattr(file_tuple, "name") and hasattr(file_tuple, "read"):
-            filename = os.path.basename(file_tuple.name)
-            file_bytes = file_tuple.read()
-            return filename, file_bytes
-    except Exception:
-        pass
-    # other shapes: tuple or dict-like
-    try:
-        # file_tuple may be (name, bytes)
-        if isinstance(file_tuple, tuple) and len(file_tuple) == 2 and isinstance(file_tuple[1], (bytes, bytearray)):
-            return file_tuple[0], bytes(file_tuple[1])
-    except Exception:
-        pass
-    # fallback if path string provided
-    try:
-        if isinstance(file_tuple, str) and os.path.exists(file_tuple):
-            with open(file_tuple, "rb") as fh:
-                return os.path.basename(file_tuple), fh.read()
     except Exception:
         pass
     raise ValueError("Unsupported file object passed by Gradio.")
 def extract_text_by_ext(filename: str, file_bytes: bytes) -> str:
     name = filename.lower()
-    if name.endswith(".pdf"):
-        return extract_text_from_pdf(file_bytes)
-    if name.endswith(".docx"):
-        return extract_text_from_docx(file_bytes)
-    if name.endswith(".txt"):
-        return extract_text_from_txt(file_bytes)
-    if name.endswith(".xlsx") or name.endswith(".xls"):
-        return extract_text_from_excel(file_bytes)
-    if name.endswith(".pptx"):
-        return extract_text_from_pptx(file_bytes)
-    if name.endswith(".csv"):
-        return extract_text_from_csv(file_bytes)
-    # fallback: try plain text
     return extract_text_from_txt(file_bytes)
-# ---------------- Embedding caching helpers ----------------
 def make_cache_key_for_files(files: List[Tuple[str, bytes]]) -> str:
-    """
-    Create a deterministic cache key based on filenames + sizes + sha256 of each file content.
-    """
     h = hashlib.sha256()
     for name, b in sorted(files, key=lambda x: x[0]):
-        h.update(name.encode("utf-8"))
-        h.update(str(len(b)).encode("utf-8"))
-        # update with small digest to keep speed; still robust
         h.update(hashlib.sha256(b).digest())
     return h.hexdigest()
 def cache_save_embeddings(cache_key: str, embeddings: np.ndarray, filenames: List[str]):
-    path = os.path.join(CACHE_DIR, f"{cache_key}.npz")
-    np.savez_compressed(path, embeddings=embeddings, filenames=np.array(filenames))
-    return path
 def cache_load_embeddings(cache_key: str):
     path = os.path.join(CACHE_DIR, f"{cache_key}.npz")
-    if not os.path.exists(path):
-        return None
     try:
         arr = np.load(path, allow_pickle=True)
-        embeddings = arr["embeddings"]
-        filenames = arr["filenames"].tolist()
-        return embeddings, filenames
     except Exception:
         return None
-# ---------------- FAISS helpers ----------------
 def build_faiss_index(embeddings: np.ndarray):
     global FAISS_INDEX
     if embeddings is None or len(embeddings) == 0:
         FAISS_INDEX = None
         return None
     emb = embeddings.astype("float32")
-    dim = emb.shape[1]
-    index = faiss.IndexFlatL2(dim)
     index.add(emb)
     FAISS_INDEX = index
     return index
@@ -187,84 +163,63 @@ def search_top_k(query: str, k: int = 3):
         return []
     q_emb = embedder.encode([query], convert_to_numpy=True).astype("float32")
     D, I = FAISS_INDEX.search(q_emb, k)
-    results = []
-    for dist, idx in zip(D[0], I[0]):
-        if idx < 0:
-            continue
-        results.append({
-            "index": int(idx),
-            "distance": float(dist),
-            "text": DOCS[idx],
-            "source": FILENAMES[idx]
-        })
-    return results
-# ---------------- OpenRouter minimal client ----------------
-def openrouter_chat_system_user(system_prompt: str, user_prompt: str):
     """
-    Sends only 'model' and 'messages' payload (system + user) to OpenRouter,
-    per your requirement (no max_tokens, temperature, etc).
     """
     if not OPENROUTER_API_KEY:
-        return "[OpenRouter error] OPENROUTER_API_KEY not set."
     url = "https://openrouter.ai/api/v1/chat/completions"
-    headers = {"Authorization": f"Bearer {OPENROUTER_API_KEY}", "Content-Type": "application/json"}
-    messages = []
-    if system_prompt:
-        messages.append({"role": "system", "content": system_prompt})
-    messages.append({"role": "user", "content": user_prompt})
-    payload = {"model": OPENROUTER_MODEL, "messages": messages}
     try:
         r = requests.post(url, headers=headers, json=payload, timeout=60)
         r.raise_for_status()
         obj = r.json()
-        # Expecting OpenAI-like structure: choices[0].message.content
-        if "choices" in obj and len(obj["choices"]) > 0:
             choice = obj["choices"][0]
             if "message" in choice and "content" in choice["message"]:
-                return choice["message"]["content"]
-            if "text" in choice:
-                return choice["text"]
-        # fallback: return entire partial json for debugging
-        return json.dumps(obj, indent=2)[:12000]
     except Exception as e:
         return f"[OpenRouter request error] {e}"
-# ---------------- Crawl4AI robust logic ----------------
 async def _crawl_async_get_markdown(url: str):
-    # uses default crawler settings; adjust with run config if needed
     async with AsyncWebCrawler() as crawler:
         result = await crawler.arun(url=url)
-        # prefer a success flag if present
         if hasattr(result, "success") and result.success is False:
-            # attempt to surface error
-            err = getattr(result, "error_message", None) or getattr(result, "error", None) or "[Crawl4AI unknown error]"
-            return f"[Crawl4AI error] {err}"
-        # try structured markdown first
         md_obj = getattr(result, "markdown", None)
         if md_obj:
-            # try common subfields observed in different versions
-            text = getattr(md_obj, "fit_markdown", None) or getattr(md_obj, "raw_markdown", None)
-            if text:
-                return text
-            # fallback to str(md_obj)
-            try:
-                return str(md_obj)
-            except Exception:
-                pass
-        # fallback to text or html
-        text = getattr(result, "text", None) or getattr(result, "html", None)
-        if text:
-            return text
-        # last resort: jsonify entire result (short)
-        try:
-            return json.dumps(result.__dict__, default=str)[:20000]
-        except Exception:
-            return "[Crawl4AI returned no usable fields]"
 def crawl_url_sync(url: str) -> str:
     try:
@@ -272,78 +227,40 @@ def crawl_url_sync(url: str) -> str:
     except Exception as e:
         return f"[Crawl4AI runtime error] {e}"
-# ---------------- Gradio handlers ----------------
 def upload_and_index(files):
-    """
-    files: list of file objects from Gradio. We'll extract bytes, compute cache key,
-    try to load embeddings from cache; if not found, compute embeddings and save.
-    """
     global DOCS, FILENAMES, EMBEDDINGS, CURRENT_CACHE_KEY
     if not files:
         return "No files uploaded.", ""
-    # read files into list of (name, bytes)
-    prepared = []
-    previews = []
-    for f in files:
-        name, b = extract_text_from_file_tuple(f)
-        prepared.append((name, b))
-        # short preview
-        previews.append({"name": name, "size": len(b)})
     cache_key = make_cache_key_for_files(prepared)
     CURRENT_CACHE_KEY = cache_key
-    # Try load existing embeddings
     cached = cache_load_embeddings(cache_key)
     if cached:
         emb, filenames = cached
         EMBEDDINGS = np.array(emb)
         FILENAMES = filenames
-        # Rebuild DOCS array: we still need textual content (not just embeddings)
-        DOCS = []
-        for name, b in prepared:
-            DOCS.append(extract_text_by_ext(name, b))
-        # Build faiss index
         build_faiss_index(EMBEDDINGS)
         return f"Loaded embeddings from cache ({len(FILENAMES)} docs).", json.dumps(previews)
-    # Not cached -> extract texts and embed
-    DOCS = []
-    FILENAMES = []
-    for name, b in prepared:
-        txt = extract_text_by_ext(name, b)
-        DOCS.append(txt)
-        FILENAMES.append(name)
-    # Compute embeddings
-    emb = embedder.encode(DOCS, convert_to_numpy=True, show_progress_bar=False).astype("float32")
-    EMBEDDINGS = emb
-    # Save to cache
     cache_save_embeddings(cache_key, EMBEDDINGS, FILENAMES)
-    # Build faiss
     build_faiss_index(EMBEDDINGS)
     return f"Uploaded and indexed {len(DOCS)} documents.", json.dumps(previews)
 def crawl_and_index(url: str):
     global DOCS, FILENAMES, EMBEDDINGS, CURRENT_CACHE_KEY
     if not url:
         return "No URL provided.", ""
     crawled = crawl_url_sync(url)
     if crawled.startswith("[Crawl4AI"):
         return crawled, ""
-    # create a cache key based on url and content
-    key_hash = hashlib.sha256()
-    key_hash.update(url.encode("utf-8"))
-    key_hash.update(crawled.encode("utf-8"))
-    cache_key = key_hash.hexdigest()
-    CURRENT_CACHE_KEY = cache_key
-    cached = cache_load_embeddings(cache_key)
     if cached:
         emb, filenames = cached
         EMBEDDINGS = np.array(emb)
@@ -351,92 +268,60 @@ def crawl_and_index(url: str):
         DOCS = [crawled]
         build_faiss_index(EMBEDDINGS)
         return f"Crawled and loaded embeddings from cache for {url}", crawled[:2000]
-    # Not cached -> index
-    DOCS = [crawled]
-    FILENAMES = [url]
-    emb = embedder.encode(DOCS, convert_to_numpy=True, show_progress_bar=False).astype("float32")
-    EMBEDDINGS = emb
-    cache_save_embeddings(cache_key, EMBEDDINGS, FILENAMES)
     build_faiss_index(EMBEDDINGS)
     return f"Crawled and indexed {url}", crawled[:2000]
-def ask_question(question: str, system_prompt: str = ""):
     if not question:
         return "Please enter a question."
     if not DOCS or FAISS_INDEX is None:
-        return "No indexed documents. Upload files or crawl a site first."
-    topk = 3
-    results = search_top_k(question, k=topk)
     if not results:
         return "No relevant documents found."
-    # prepare context from top results (trim each)
-    context_blocks = []
-    meta = []
-    for r in results:
-        snippet = r["text"]
-        if len(snippet) > 1800:
-            snippet = snippet[:1800] + "\n...[truncated]"
-        context_blocks.append(f"Source: {r['source']}\n\n{snippet}\n\n---\n")
-        meta.append({"source": r["source"], "distance": r["distance"]})
-    context = "\n".join(context_blocks)
-    user_prompt = f"Use the following context to answer the question, and cite sources from the 'Source:' lines.\n\nContext:\n{context}\nQuestion: {question}\nAnswer:"
-    # Call OpenRouter with only model + messages (system & user)
-    try:
-        answer = openrouter_chat_system_user(system_prompt=system_prompt, user_prompt=user_prompt)
-    except Exception as e:
-        answer = f"[OpenRouter call failed] {e}"
-    out = {"answer": answer, "sources": meta}
-    return json.dumps(out, indent=2)
 # ---------------- Gradio UI ----------------
-with gr.Blocks(title="AI Ally (Gradio) — Crawl4AI + OpenRouter + FAISS") as demo:
-    gr.Markdown("# AI Ally — Document & Website QA\nCrawl4AI for websites, local file uploads for docs. FAISS retrieval + sentence-transformers embeddings. OpenRouter used for generation (only model + messages).")
     with gr.Tab("Documents"):
-        with gr.Row():
-            file_input = gr.File(label="Upload files", file_count="multiple", file_types=[".pdf", ".docx", ".txt", ".xlsx", ".pptx", ".csv"])
-            upload_btn = gr.Button("Upload & Index")
-        with gr.Row():
-            upload_status = gr.Textbox(label="Status", interactive=False)
-            preview_box = gr.Textbox(label="Uploads (preview JSON)", interactive=False)
         upload_btn.click(upload_and_index, inputs=[file_input], outputs=[upload_status, preview_box])
-        gr.Markdown("### Ask about the indexed documents")
-        q = gr.Textbox(label="Question", lines=5)
-        sys_prompt = gr.Textbox(label="Optional System Prompt (sent to LLM)", lines=5, value="You are a helpful assistant.")
         ask_btn = gr.Button("Ask")
-        answer_out = gr.Textbox(label="Answer JSON", interactive=False, lines=15)
-        ask_btn.click(ask_question, inputs=[q, sys_prompt], outputs=[answer_out])
     with gr.Tab("Website Crawl"):
-        with gr.Row():
-            url = gr.Textbox(label="URL to crawl (starting URL)")
-            crawl_btn = gr.Button("Crawl & Index")
-        with gr.Row():
-            crawl_status = gr.Textbox(label="Status", interactive=False)
-            crawl_preview = gr.Textbox(label="Crawl preview (first 2k chars)", interactive=False)
         crawl_btn.click(crawl_and_index, inputs=[url], outputs=[crawl_status, crawl_preview])
-        gr.Markdown("### Ask about the crawled site")
-        q2 = gr.Textbox(label="Question", lines=5)
-        sys_prompt2 = gr.Textbox(label="Optional System Prompt (sent to LLM)", lines=10, value="You are a helpful assistant.")
         ask_btn2 = gr.Button("Ask site")
-        answer_out2 = gr.Textbox(label="Answer JSON", interactive=False, lines=15)
-        ask_btn2.click(ask_question, inputs=[q2, sys_prompt2], outputs=[answer_out2])
     with gr.Tab("Settings / Info"):
-        gr.Markdown(f"- OpenRouter model: `{OPENROUTER_MODEL}`")
         gr.Markdown(f"- Embedding model: `{EMBEDDING_MODEL_NAME}`")
-        gr.Markdown("Set `OPENROUTER_API_KEY` in your environment or HF Secrets before deploying.")
-        gr.Markdown("Cache directory: `" + CACHE_DIR + "`")
-    gr.Markdown("----\nNotes: This app saves embeddings to `./cache/` using a deterministic cache key. OpenRouter calls include only `model` + `messages` (system + user) as requested.")
 if __name__ == "__main__":
-    demo.launch(server_name="0.0.0.0", server_port=7860)

 import asyncio
 import json
 import hashlib
+import shutil
 from io import BytesIO, StringIO
 from typing import List, Tuple
 import requests
 import pandas as pd
 from sentence_transformers import SentenceTransformer
 import fitz  # PyMuPDF
 import docx
 from pptx import Presentation
 from crawl4ai import AsyncWebCrawler
 # ---------------- Config ----------------
 OPENROUTER_MODEL = "nvidia/nemotron-nano-12b-v2-vl:free"
 EMBEDDING_MODEL_NAME = "all-MiniLM-L6-v2"
 CACHE_DIR = "./cache"
+SYSTEM_PROMPT = "You are a helpful assistant."
 os.makedirs(CACHE_DIR, exist_ok=True)
 embedder = SentenceTransformer(EMBEDDING_MODEL_NAME)
 DOCS: List[str] = []
 FILENAMES: List[str] = []
 EMBEDDINGS: np.ndarray = None
 FAISS_INDEX = None
 CURRENT_CACHE_KEY: str = ""
+# ---------------- Periodic cache cleanup ----------------
+async def clear_cache_every_5min():
+    while True:
+        await asyncio.sleep(300)  # 5 minutes
+        try:
+            if os.path.exists(CACHE_DIR):
+                shutil.rmtree(CACHE_DIR)
+            os.makedirs(CACHE_DIR, exist_ok=True)
+            print("🧹 Cache cleared successfully.")
+        except Exception as e:
+            print(f"[Cache cleanup error] {e}")
+# Launch the cleaner in background
+asyncio.get_event_loop().create_task(clear_cache_every_5min())
 # ---------------- File extraction helpers ----------------
 def extract_text_from_pdf(file_bytes: bytes) -> str:
     try:
         doc = fitz.open(stream=file_bytes, filetype="pdf")
+        return "\n".join(page.get_text() for page in doc)
     except Exception as e:
         return f"[PDF extraction error] {e}"
     try:
         f = BytesIO(file_bytes)
         doc = docx.Document(f)
+        return "\n".join(p.text for p in doc.paragraphs)
     except Exception as e:
         return f"[DOCX extraction error] {e}"
     try:
         f = BytesIO(file_bytes)
         df = pd.read_excel(f, dtype=str)
+        return "\n".join("\n".join(df[col].fillna("").astype(str).tolist()) for col in df.columns)
     except Exception as e:
         return f"[EXCEL extraction error] {e}"
         return f"[CSV extraction error] {e}"
 def extract_text_from_file_tuple(file_tuple) -> Tuple[str, bytes]:
     try:
         if hasattr(file_tuple, "name") and hasattr(file_tuple, "read"):
+            return os.path.basename(file_tuple.name), file_tuple.read()
     except Exception:
         pass
+    if isinstance(file_tuple, tuple) and len(file_tuple) == 2 and isinstance(file_tuple[1], (bytes, bytearray)):
+        return file_tuple[0], bytes(file_tuple[1])
+    if isinstance(file_tuple, str) and os.path.exists(file_tuple):
+        with open(file_tuple, "rb") as fh:
+            return os.path.basename(file_tuple), fh.read()
     raise ValueError("Unsupported file object passed by Gradio.")
 def extract_text_by_ext(filename: str, file_bytes: bytes) -> str:
     name = filename.lower()
+    if name.endswith(".pdf"): return extract_text_from_pdf(file_bytes)
+    if name.endswith(".docx"): return extract_text_from_docx(file_bytes)
+    if name.endswith(".txt"): return extract_text_from_txt(file_bytes)
+    if name.endswith((".xlsx", ".xls")): return extract_text_from_excel(file_bytes)
+    if name.endswith(".pptx"): return extract_text_from_pptx(file_bytes)
+    if name.endswith(".csv"): return extract_text_from_csv(file_bytes)
     return extract_text_from_txt(file_bytes)
+# ---------------- Cache + FAISS helpers ----------------
 def make_cache_key_for_files(files: List[Tuple[str, bytes]]) -> str:
     h = hashlib.sha256()
     for name, b in sorted(files, key=lambda x: x[0]):
+        h.update(name.encode())
+        h.update(str(len(b)).encode())
         h.update(hashlib.sha256(b).digest())
     return h.hexdigest()
 def cache_save_embeddings(cache_key: str, embeddings: np.ndarray, filenames: List[str]):
+    np.savez_compressed(os.path.join(CACHE_DIR, f"{cache_key}.npz"), embeddings=embeddings, filenames=np.array(filenames))
 def cache_load_embeddings(cache_key: str):
     path = os.path.join(CACHE_DIR, f"{cache_key}.npz")
+    if not os.path.exists(path): return None
     try:
         arr = np.load(path, allow_pickle=True)
+        return arr["embeddings"], arr["filenames"].tolist()
     except Exception:
         return None
 def build_faiss_index(embeddings: np.ndarray):
     global FAISS_INDEX
     if embeddings is None or len(embeddings) == 0:
         FAISS_INDEX = None
         return None
     emb = embeddings.astype("float32")
+    index = faiss.IndexFlatL2(emb.shape[1])
     index.add(emb)
     FAISS_INDEX = index
     return index
         return []
     q_emb = embedder.encode([query], convert_to_numpy=True).astype("float32")
     D, I = FAISS_INDEX.search(q_emb, k)
+    return [{"index": int(i), "distance": float(d), "text": DOCS[i], "source": FILENAMES[i]} for d, i in zip(D[0], I[0]) if i >= 0]
+# ---------------- OpenRouter Client ----------------
+def openrouter_chat_system_user(user_prompt: str):
     """
+    Sends user prompt to OpenRouter and expects a plain text response.
     """
     if not OPENROUTER_API_KEY:
+        return "[OpenRouter error] Missing OPENROUTER_API_KEY."
     url = "https://openrouter.ai/api/v1/chat/completions"
+    headers = {
+        "Authorization": f"Bearer {OPENROUTER_API_KEY}",
+        "Content-Type": "application/json",
+    }
+    # Tell the model explicitly to reply as plain text only
+    payload = {
+        "model": OPENROUTER_MODEL,
+        "messages": [
+            {"role": "system", "content": SYSTEM_PROMPT + " Always respond in plain text. Avoid JSON or markdown formatting."},
+            {"role": "user", "content": user_prompt},
+        ],
+    }
     try:
         r = requests.post(url, headers=headers, json=payload, timeout=60)
         r.raise_for_status()
         obj = r.json()
+        # Safely extract plain text
+        if "choices" in obj and obj["choices"]:
             choice = obj["choices"][0]
             if "message" in choice and "content" in choice["message"]:
+                text = choice["message"]["content"]
+                # Ensure no markdown or code blocks
+                text = text.strip().replace("```", "").replace("json", "")
+                return text
+            elif "text" in choice:
+                return choice["text"].strip()
+        return "[OpenRouter] Unexpected response format."
     except Exception as e:
         return f"[OpenRouter request error] {e}"
+# ---------------- Crawl4AI Logic ----------------
 async def _crawl_async_get_markdown(url: str):
     async with AsyncWebCrawler() as crawler:
         result = await crawler.arun(url=url)
         if hasattr(result, "success") and result.success is False:
+            return f"[Crawl4AI error] {getattr(result, 'error_message', '[Unknown error]')}"
         md_obj = getattr(result, "markdown", None)
         if md_obj:
+            return getattr(md_obj, "fit_markdown", None) or getattr(md_obj, "raw_markdown", None) or str(md_obj)
+        return getattr(result, "text", None) or getattr(result, "html", None) or "[Crawl4AI returned no usable fields]"
 def crawl_url_sync(url: str) -> str:
     try:
     except Exception as e:
         return f"[Crawl4AI runtime error] {e}"
+# ---------------- Gradio Handlers ----------------
 def upload_and_index(files):
     global DOCS, FILENAMES, EMBEDDINGS, CURRENT_CACHE_KEY
     if not files:
         return "No files uploaded.", ""
+    prepared = [(name := extract_text_from_file_tuple(f)[0], extract_text_from_file_tuple(f)[1]) for f in files]
+    previews = [{"name": n, "size": len(b)} for n, b in prepared]
     cache_key = make_cache_key_for_files(prepared)
     CURRENT_CACHE_KEY = cache_key
     cached = cache_load_embeddings(cache_key)
     if cached:
         emb, filenames = cached
         EMBEDDINGS = np.array(emb)
         FILENAMES = filenames
+        DOCS = [extract_text_by_ext(n, b) for n, b in prepared]
         build_faiss_index(EMBEDDINGS)
         return f"Loaded embeddings from cache ({len(FILENAMES)} docs).", json.dumps(previews)
+    DOCS, FILENAMES = zip(*[(extract_text_by_ext(n, b), n) for n, b in prepared])
+    EMBEDDINGS = embedder.encode(DOCS, convert_to_numpy=True, show_progress_bar=False).astype("float32")
     cache_save_embeddings(cache_key, EMBEDDINGS, FILENAMES)
     build_faiss_index(EMBEDDINGS)
     return f"Uploaded and indexed {len(DOCS)} documents.", json.dumps(previews)
 def crawl_and_index(url: str):
     global DOCS, FILENAMES, EMBEDDINGS, CURRENT_CACHE_KEY
     if not url:
         return "No URL provided.", ""
     crawled = crawl_url_sync(url)
     if crawled.startswith("[Crawl4AI"):
         return crawled, ""
+    key_hash = hashlib.sha256((url + crawled).encode()).hexdigest()
+    CURRENT_CACHE_KEY = key_hash
+    cached = cache_load_embeddings(key_hash)
     if cached:
         emb, filenames = cached
         EMBEDDINGS = np.array(emb)
         DOCS = [crawled]
         build_faiss_index(EMBEDDINGS)
         return f"Crawled and loaded embeddings from cache for {url}", crawled[:2000]
+    DOCS, FILENAMES = [crawled], [url]
+    EMBEDDINGS = embedder.encode(DOCS, convert_to_numpy=True, show_progress_bar=False).astype("float32")
+    cache_save_embeddings(key_hash, EMBEDDINGS, FILENAMES)
     build_faiss_index(EMBEDDINGS)
     return f"Crawled and indexed {url}", crawled[:2000]
+def ask_question(question: str):
     if not question:
         return "Please enter a question."
     if not DOCS or FAISS_INDEX is None:
+        return "No indexed data found."
+    results = search_top_k(question, k=3)
     if not results:
         return "No relevant documents found."
+    context = "\n".join(f"Source: {r['source']}\n\n{r['text'][:1800]}\n---\n" for r in results)
+    user_prompt = f"Use the following context to answer the question.\n\nContext:\n{context}\nQuestion: {question}\nAnswer:"
+    return openrouter_chat_system_user(user_prompt)
 # ---------------- Gradio UI ----------------
+with gr.Blocks(title="AI Ally — Crawl4AI + OpenRouter + FAISS") as demo:
+    gr.Markdown("# 🤖 AI Ally — Document & Website QA\nCrawl4AI for websites, file uploads for docs. FAISS retrieval + sentence-transformers + OpenRouter LLM.")
     with gr.Tab("Documents"):
+        file_input = gr.File(label="Upload files", file_count="multiple",
+                             file_types=[".pdf", ".docx", ".txt", ".xlsx", ".pptx", ".csv"])
+        upload_btn = gr.Button("Upload & Index")
+        upload_status = gr.Textbox(label="Status", interactive=False)
+        preview_box = gr.Textbox(label="Uploads (preview JSON)", interactive=False)
         upload_btn.click(upload_and_index, inputs=[file_input], outputs=[upload_status, preview_box])
+        gr.Markdown("### Ask about your documents")
+        q = gr.Textbox(label="Question", lines=3)
         ask_btn = gr.Button("Ask")
+        answer_out = gr.Textbox(label="Answer", interactive=False, lines=15)
+        ask_btn.click(ask_question, inputs=[q], outputs=[answer_out])
     with gr.Tab("Website Crawl"):
+        url = gr.Textbox(label="URL to crawl")
+        crawl_btn = gr.Button("Crawl & Index")
+        crawl_status = gr.Textbox(label="Status", interactive=False)
+        crawl_preview = gr.Textbox(label="Crawl preview", interactive=False)
         crawl_btn.click(crawl_and_index, inputs=[url], outputs=[crawl_status, crawl_preview])
+        q2 = gr.Textbox(label="Question", lines=3)
         ask_btn2 = gr.Button("Ask site")
+        answer_out2 = gr.Textbox(label="Answer", interactive=False, lines=15)
+        ask_btn2.click(ask_question, inputs=[q2], outputs=[answer_out2])
     with gr.Tab("Settings / Info"):
+        gr.Markdown(f"- Model: `{OPENROUTER_MODEL}`")
         gr.Markdown(f"- Embedding model: `{EMBEDDING_MODEL_NAME}`")
+        gr.Markdown(f"- Cache clears automatically every 5 minutes.")
+        gr.Markdown(f"- System prompt is fixed internally: `{SYSTEM_PROMPT}`")
 if __name__ == "__main__":
+    demo.launch(server_name="0.0.0.0", server_port=7860, debug=True)