Spaces:

prithvi1029
/

agentic-document-intelligence

Sleeping

App Files Files Community

prithvi1029 commited on 22 days ago

Commit

572a56e

verified ·

1 Parent(s): 5d73db8

Update app.py

Browse files

Files changed (1) hide show

app.py +53 -31

app.py CHANGED Viewed

@@ -14,10 +14,13 @@ from huggingface_hub import InferenceClient
 # -----------------------------
 HF_TOKEN = (os.getenv("HUGGINGFACEHUB_API_TOKEN") or os.getenv("HF_TOKEN") or "").strip()
-HF_LLM_MODEL = os.getenv("HF_LLM_MODEL", "mistralai/Mistral-7B-Instruct-v0.3")
-# Optional: set HF_PROVIDER="together" in Space secrets if you want Together
-HF_PROVIDER = (os.getenv("HF_PROVIDER") or "").strip() or None
 EMBED_MODEL_NAME = os.getenv("EMBED_MODEL", "sentence-transformers/all-MiniLM-L6-v2")
 TOP_K = 4
@@ -75,16 +78,27 @@ def retrieve(query, embedder, index, chunks, k=TOP_K):
     return hits
-def hf_generate(client: InferenceClient, prompt: str) -> str:
     """
-    Use chat_completion (conversational) because Together does not support
-    text_generation for mistralai/Mistral-7B-Instruct-v0.3.
     """
     resp = client.chat_completion(
         model=HF_LLM_MODEL,
         messages=[
-            {"role": "system", "content": "You are a helpful assistant. Answer using ONLY the provided context."},
-            {"role": "user", "content": prompt},
         ],
         max_tokens=450,
         temperature=0.2,
@@ -105,11 +119,14 @@ def on_upload(pdf_path):
     text = pdf_to_text(pdf_path)
     if not text.strip():
-        return None, None, "Could not extract text from this PDF (it may be scanned). Try a text-based PDF."
     chunks = chunk_text(text)
     if len(chunks) < 2:
-        return None, None, "Not enough extractable text to build RAG index."
     index, _ = build_faiss_index(chunks, embedder)
     return index, chunks, f"✅ Indexed {len(chunks)} chunks. Now ask a question."
@@ -123,30 +140,35 @@ def answer_question(index, chunks, question):
     if not HF_TOKEN:
         return (
-            "HF token not found. Go to Space → Settings → Variables and secrets → "
-            "add Secret named HUGGINGFACEHUB_API_TOKEN, then Restart Space."
         )
     hits = retrieve(question, embedder, index, chunks, k=TOP_K)
-    context = "\n\n".join([f"[{i+1}] {h[1]}" for i, h in enumerate(hits)])
-    prompt = f"""Answer using ONLY the context.
-If the answer is not in the context, say: "I don't know from the provided document."
-Question: {question}
-Context:
-{context}
-Answer:"""
-    # Create client (provider optional)
-    if HF_PROVIDER:
-        client = InferenceClient(provider=HF_PROVIDER, token=HF_TOKEN)
-    else:
-        client = InferenceClient(token=HF_TOKEN)
-    ans = hf_generate(client, prompt)
     sources = "\n\n".join(
         [f"**Source {i+1} (score={hits[i][0]:.3f})**\n{hits[i][1][:600]}..." for i in range(len(hits))]
@@ -161,9 +183,9 @@ Answer:"""
 with gr.Blocks(title="Agentic Document Intelligence (HF RAG)") as demo:
     gr.Markdown(
         "# 📄 Agentic Document Intelligence\n"
-        "Upload a PDF and ask questions (RAG) — using Hugging Face Inference API.\n\n"
-        "**If using Together:** set Space secret `HF_PROVIDER=together`.\n"
-        "**Token tip:** ensure HF token has no trailing newline."
     )
     pdf = gr.File(label="Upload PDF", type="filepath")

 # -----------------------------
 HF_TOKEN = (os.getenv("HUGGINGFACEHUB_API_TOKEN") or os.getenv("HF_TOKEN") or "").strip()
+# IMPORTANT: force HF's own inference provider so it DOES NOT route via Together
+HF_PROVIDER = "hf-inference"
+# Pick a model that works with HF Inference.
+# If this model is not available on hf-inference for your account/region,
+# change it to another instruct/chat model you have access to.
+HF_LLM_MODEL = os.getenv("HF_LLM_MODEL", "mistralai/Mistral-7B-Instruct-v0.3")
 EMBED_MODEL_NAME = os.getenv("EMBED_MODEL", "sentence-transformers/all-MiniLM-L6-v2")
 TOP_K = 4
     return hits
+def hf_generate(client: InferenceClient, question: str, context: str) -> str:
     """
+    Use chat_completion, but FORCE provider=hf-inference so it won't route to Together.
     """
+    system = (
+        "You are a helpful assistant. Answer using ONLY the provided context from the document. "
+        "If the answer is not in the context, say: \"I don't know from the provided document.\""
+    )
+    user = f"""Question: {question}
+Context:
+{context}
+Answer:"""
     resp = client.chat_completion(
         model=HF_LLM_MODEL,
         messages=[
+            {"role": "system", "content": system},
+            {"role": "user", "content": user},
         ],
         max_tokens=450,
         temperature=0.2,
     text = pdf_to_text(pdf_path)
     if not text.strip():
+        return None, None, (
+            "Could not extract text from this PDF (it may be scanned / image-only). "
+            "Try a text-based PDF or run OCR before uploading."
+        )
     chunks = chunk_text(text)
     if len(chunks) < 2:
+        return None, None, "Not enough extractable text to build the RAG index."
     index, _ = build_faiss_index(chunks, embedder)
     return index, chunks, f"✅ Indexed {len(chunks)} chunks. Now ask a question."
     if not HF_TOKEN:
         return (
+            "❌ HF token not found.\n\n"
+            "Go to Space → Settings → Variables and secrets → New secret\n"
+            "Name: `HUGGINGFACEHUB_API_TOKEN`\n"
+            "Value: your `hf_...` token\n"
+            "Then Restart the Space."
         )
+    # Retrieve context
     hits = retrieve(question, embedder, index, chunks, k=TOP_K)
+    if not hits:
+        return "No relevant chunks retrieved from the PDF. Try a different question."
+    context = "\n\n".join([f"[{i+1}] {h[1]}" for i, h in enumerate(hits)])
+    # IMPORTANT: force hf-inference provider (NOT Together)
+    client = InferenceClient(provider=HF_PROVIDER, token=HF_TOKEN)
+    try:
+        ans = hf_generate(client, question=question, context=context)
+    except Exception as e:
+        # Show clean error instead of crashing
+        return (
+            "❌ LLM call failed.\n\n"
+            f"**Error:** `{type(e).__name__}: {str(e)}`\n\n"
+            "✅ Fix tips:\n"
+            "- Ensure your secret `HUGGINGFACEHUB_API_TOKEN` is saved correctly (no newline).\n"
+            "- If you still see `router.huggingface.co/together/...` in logs, you are not forcing hf-inference.\n"
+            "- Try changing `HF_LLM_MODEL` to a model available to your account on HF Inference.\n"
+        )
     sources = "\n\n".join(
         [f"**Source {i+1} (score={hits[i][0]:.3f})**\n{hits[i][1][:600]}..." for i in range(len(hits))]
 with gr.Blocks(title="Agentic Document Intelligence (HF RAG)") as demo:
     gr.Markdown(
         "# 📄 Agentic Document Intelligence\n"
+        "Upload a PDF and ask questions (RAG).\n\n"
+        "**Important:** This app forces `hf-inference` so it does NOT use Together.\n"
+        "If your PDF is scanned (image-only), text extraction will fail unless OCR is used."
     )
     pdf = gr.File(label="Upload PDF", type="filepath")