Spaces:

Ryanfafa
/

docmind-ai

Running

App Files Files Community

Ryanfafa commited on 29 days ago

Commit

746cdfd

verified ·

1 Parent(s): 9efdf3f

Update rag_engine.py

Browse files

Files changed (1) hide show

rag_engine.py +39 -53

rag_engine.py CHANGED Viewed

@@ -1,18 +1,16 @@
 """
 RAG Engine
 Embeddings : sentence-transformers/all-MiniLM-L6-v2
-Vector DB  : ChromaDB (local, in-memory / persistent)
-LLM        : HuggingFace Router API (tries multiple free models)
 Chunking   : Recursive character splitter with overlap
 """
 import os
 import re
-import requests
 import tempfile
 from typing import Tuple, List
-import chromadb
 from chromadb.config import Settings
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain_community.embeddings import HuggingFaceEmbeddings
@@ -27,12 +25,12 @@ TOP_K           = 4
 COLLECTION_NAME = "docmind_collection"
 CHROMA_DIR      = "./chroma_db"
-# Free models to try in order
 CANDIDATE_MODELS = [
     "mistralai/Mistral-7B-Instruct-v0.3",
     "microsoft/Phi-3.5-mini-instruct",
-    "google/gemma-2-2b-it",
     "HuggingFaceH4/zephyr-7b-beta",
 ]
@@ -106,66 +104,53 @@ class RAGEngine:
         if not hf_token:
             excerpt = extract_best(question, context)
             return (
-                "HF_TOKEN not set. To enable AI answers:\n"
-                "1. Get a free token at huggingface.co/settings/tokens\n"
-                "2. Add it as a Secret named HF_TOKEN in your Space Settings\n\n"
-                "Most relevant excerpt from your document:\n\n" + excerpt
             )
-        headers = {
-            "Content-Type":  "application/json",
-            "Authorization": "Bearer " + hf_token,
-        }
-        messages = [
-            {
-                "role": "system",
-                "content": (
-                    "You are DocMind, an expert document analyst. "
-                    "Answer using ONLY the provided document context. "
-                    "Be concise and cite specific details. "
-                    "If the answer is not in the context, say so clearly."
-                ),
-            },
-            {
-                "role": "user",
-                "content": "Document context:\n" + context + "\n\nQuestion: " + question,
-            },
-        ]
         last_error = ""
         for model_id in CANDIDATE_MODELS:
             try:
-                url  = (
-                    "https://router.huggingface.co/hf-inference/models/"
-                    + model_id
-                    + "/v1/chat/completions"
-                )
-                resp = requests.post(
-                    url,
-                    headers=headers,
-                    json={
-                        "model":       model_id,
-                        "messages":    messages,
-                        "max_tokens":  512,
-                        "temperature": 0.2,
-                    },
                     timeout=60,
                 )
-                if resp.status_code == 200:
-                    answer = resp.json()["choices"][0]["message"]["content"].strip()
-                    if answer:
-                        return answer
-                else:
-                    last_error = str(resp.status_code) + ": " + resp.text[:200]
             except Exception as e:
                 last_error = str(e)
                 continue
         excerpt = extract_best(question, context)
         return (
-            "LLM models unavailable - showing most relevant excerpt:\n\n"
             + excerpt
-            + "\n\nLast error: " + last_error
         )
@@ -179,8 +164,9 @@ def extract_best(question: str, context: str) -> str:
         if score > best_score:
             best_score = score
             best_chunk = chunk.strip()
-    excerpt = best_chunk[:600] + ("..." if len(best_chunk) > 600 else "")
-    return excerpt or "No relevant content found."
 def get_suffix(name: str) -> str:

 """
 RAG Engine
 Embeddings : sentence-transformers/all-MiniLM-L6-v2
+Vector DB  : ChromaDB (local)
+LLM        : HuggingFace InferenceClient (official library, auto-routing)
 Chunking   : Recursive character splitter with overlap
 """
 import os
 import re
 import tempfile
 from typing import Tuple, List
 from chromadb.config import Settings
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain_community.embeddings import HuggingFaceEmbeddings
 COLLECTION_NAME = "docmind_collection"
 CHROMA_DIR      = "./chroma_db"
+# Models to try in order (all free with HF token)
 CANDIDATE_MODELS = [
     "mistralai/Mistral-7B-Instruct-v0.3",
     "microsoft/Phi-3.5-mini-instruct",
     "HuggingFaceH4/zephyr-7b-beta",
+    "google/gemma-2-2b-it",
 ]
         if not hf_token:
             excerpt = extract_best(question, context)
             return (
+                "HF_TOKEN not set. Add it as a Secret in your Space Settings.\n\n"
+                "Best matching excerpt:\n\n" + excerpt
             )
+        # Use the official huggingface_hub InferenceClient
+        try:
+            from huggingface_hub import InferenceClient
+        except ImportError:
+            return "huggingface_hub not installed. Check requirements.txt."
+        system_prompt = (
+            "You are DocMind, an expert document analyst. "
+            "Answer using ONLY the provided document context. "
+            "Be concise and accurate. "
+            "If the answer is not in the context, say so clearly."
+        )
+        user_message = "Document context:\n" + context + "\n\nQuestion: " + question
         last_error = ""
         for model_id in CANDIDATE_MODELS:
             try:
+                client = InferenceClient(
+                    model=model_id,
+                    token=hf_token,
                     timeout=60,
                 )
+                result = client.chat_completion(
+                    messages=[
+                        {"role": "system", "content": system_prompt},
+                        {"role": "user",   "content": user_message},
+                    ],
+                    max_tokens=512,
+                    temperature=0.2,
+                )
+                answer = result.choices[0].message.content.strip()
+                if answer:
+                    return answer
             except Exception as e:
                 last_error = str(e)
                 continue
+        # All models failed — use extractive fallback
         excerpt = extract_best(question, context)
         return (
+            "AI answer unavailable. Here is the most relevant part of your document:\n\n"
             + excerpt
+            + "\n\n(Error: " + last_error + ")"
         )
         if score > best_score:
             best_score = score
             best_chunk = chunk.strip()
+    if not best_chunk:
+        return "No relevant content found."
+    return best_chunk[:600] + ("..." if len(best_chunk) > 600 else "")
 def get_suffix(name: str) -> str: