Spaces:

prithvi1029
/

agentic-document-intelligence

Sleeping

App Files Files Community

prithvi1029 commited on 6 days ago

Commit

889af78

verified ·

1 Parent(s): 572a56e

Update app.py

Browse files

Files changed (1) hide show

app.py +59 -69

app.py CHANGED Viewed

@@ -12,18 +12,21 @@ from huggingface_hub import InferenceClient
 # -----------------------------
 # Config
 # -----------------------------
-HF_TOKEN = (os.getenv("HUGGINGFACEHUB_API_TOKEN") or os.getenv("HF_TOKEN") or "").strip()
-# IMPORTANT: force HF's own inference provider so it DOES NOT route via Together
-HF_PROVIDER = "hf-inference"
-# Pick a model that works with HF Inference.
-# If this model is not available on hf-inference for your account/region,
-# change it to another instruct/chat model you have access to.
-HF_LLM_MODEL = os.getenv("HF_LLM_MODEL", "mistralai/Mistral-7B-Instruct-v0.3")
-EMBED_MODEL_NAME = os.getenv("EMBED_MODEL", "sentence-transformers/all-MiniLM-L6-v2")
-TOP_K = 4
 # -----------------------------
@@ -41,9 +44,7 @@ def chunk_text(text: str, chunk_size=900, overlap=150):
     while start < n:
         end = min(n, start + chunk_size)
         chunks.append(text[start:end])
-        start = end - overlap
-        if start < 0:
-            start = 0
         if end == n:
             break
     return [c for c in (clean_text(x) for x in chunks) if len(c) > 30]
@@ -64,7 +65,7 @@ def build_faiss_index(chunks, embedder):
     dim = vectors.shape[1]
     index = faiss.IndexFlatIP(dim)  # cosine similarity since normalized
     index.add(vectors.astype(np.float32))
-    return index, vectors
 def retrieve(query, embedder, index, chunks, k=TOP_K):
@@ -78,33 +79,34 @@ def retrieve(query, embedder, index, chunks, k=TOP_K):
     return hits
-def hf_generate(client: InferenceClient, question: str, context: str) -> str:
     """
-    Use chat_completion, but FORCE provider=hf-inference so it won't route to Together.
     """
-    system = (
-        "You are a helpful assistant. Answer using ONLY the provided context from the document. "
-        "If the answer is not in the context, say: \"I don't know from the provided document.\""
-    )
-    user = f"""Question: {question}
-Context:
-{context}
-Answer:"""
-    resp = client.chat_completion(
-        model=HF_LLM_MODEL,
-        messages=[
-            {"role": "system", "content": system},
-            {"role": "user", "content": user},
-        ],
-        max_tokens=450,
-        temperature=0.2,
-        top_p=0.9,
-    )
-    return resp.choices[0].message.content.strip()
 # -----------------------------
@@ -119,16 +121,13 @@ def on_upload(pdf_path):
     text = pdf_to_text(pdf_path)
     if not text.strip():
-        return None, None, (
-            "Could not extract text from this PDF (it may be scanned / image-only). "
-            "Try a text-based PDF or run OCR before uploading."
-        )
     chunks = chunk_text(text)
     if len(chunks) < 2:
-        return None, None, "Not enough extractable text to build the RAG index."
-    index, _ = build_faiss_index(chunks, embedder)
     return index, chunks, f"✅ Indexed {len(chunks)} chunks. Now ask a question."
@@ -140,35 +139,27 @@ def answer_question(index, chunks, question):
     if not HF_TOKEN:
         return (
-            "❌ HF token not found.\n\n"
-            "Go to Space → Settings → Variables and secrets → New secret\n"
             "Name: `HUGGINGFACEHUB_API_TOKEN`\n"
-            "Value: your `hf_...` token\n"
-            "Then Restart the Space."
         )
-    # Retrieve context
     hits = retrieve(question, embedder, index, chunks, k=TOP_K)
-    if not hits:
-        return "No relevant chunks retrieved from the PDF. Try a different question."
     context = "\n\n".join([f"[{i+1}] {h[1]}" for i, h in enumerate(hits)])
-    # IMPORTANT: force hf-inference provider (NOT Together)
-    client = InferenceClient(provider=HF_PROVIDER, token=HF_TOKEN)
-    try:
-        ans = hf_generate(client, question=question, context=context)
-    except Exception as e:
-        # Show clean error instead of crashing
-        return (
-            "❌ LLM call failed.\n\n"
-            f"**Error:** `{type(e).__name__}: {str(e)}`\n\n"
-            "✅ Fix tips:\n"
-            "- Ensure your secret `HUGGINGFACEHUB_API_TOKEN` is saved correctly (no newline).\n"
-            "- If you still see `router.huggingface.co/together/...` in logs, you are not forcing hf-inference.\n"
-            "- Try changing `HF_LLM_MODEL` to a model available to your account on HF Inference.\n"
-        )
     sources = "\n\n".join(
         [f"**Source {i+1} (score={hits[i][0]:.3f})**\n{hits[i][1][:600]}..." for i in range(len(hits))]
@@ -184,8 +175,7 @@ with gr.Blocks(title="Agentic Document Intelligence (HF RAG)") as demo:
     gr.Markdown(
         "# 📄 Agentic Document Intelligence\n"
         "Upload a PDF and ask questions (RAG).\n\n"
-        "**Important:** This app forces `hf-inference` so it does NOT use Together.\n"
-        "If your PDF is scanned (image-only), text extraction will fail unless OCR is used."
     )
     pdf = gr.File(label="Upload PDF", type="filepath")
@@ -200,7 +190,7 @@ with gr.Blocks(title="Agentic Document Intelligence (HF RAG)") as demo:
         outputs=[index_state, chunks_state, status],
     )
-    question = gr.Textbox(label="Ask a question", placeholder="e.g., What is the payment term?")
     out = gr.Markdown()
     btn = gr.Button("Run")

 # -----------------------------
 # Config
 # -----------------------------
+# IMPORTANT: strip() removes accidental newline in token (common issue in Secrets)
+HF_TOKEN = (
+    os.getenv("HUGGINGFACEHUB_API_TOKEN")
+    or os.getenv("HUGGINGFACEHUB_API_TOKEN".replace("-", "_"))  # just in case
+    or os.getenv("HF_TOKEN")
+    or ""
+).strip()
+# Pick a model that is available to you on HF Inference
+# If mistralai/Mistral-7B-Instruct-v0.3 fails, set this in Space Variables:
+# HF_LLM_MODEL = "HuggingFaceH4/zephyr-7b-beta"  (example)
+HF_LLM_MODEL = os.getenv("HF_LLM_MODEL", "mistralai/Mistral-7B-Instruct-v0.3").strip()
+EMBED_MODEL_NAME = os.getenv("EMBED_MODEL", "sentence-transformers/all-MiniLM-L6-v2").strip()
+TOP_K = int(os.getenv("TOP_K", "4"))
 # -----------------------------
     while start < n:
         end = min(n, start + chunk_size)
         chunks.append(text[start:end])
+        start = max(0, end - overlap)
         if end == n:
             break
     return [c for c in (clean_text(x) for x in chunks) if len(c) > 30]
     dim = vectors.shape[1]
     index = faiss.IndexFlatIP(dim)  # cosine similarity since normalized
     index.add(vectors.astype(np.float32))
+    return index
 def retrieve(query, embedder, index, chunks, k=TOP_K):
     return hits
+def hf_generate_text(prompt: str) -> str:
     """
+    Force HF Inference (NOT Together).
+    Use text_generation endpoint (NOT chat_completion) to avoid "conversational" task errors.
     """
+    client = InferenceClient(provider="hf-inference", token=HF_TOKEN)
+    try:
+        out = client.text_generation(
+            model=HF_LLM_MODEL,
+            prompt=prompt,
+            max_new_tokens=450,
+            temperature=0.2,
+            top_p=0.9,
+            repetition_penalty=1.08,
+            return_full_text=False,
+        )
+        return (out or "").strip()
+    except Exception as e:
+        return (
+            "LLM call failed.\n\n"
+            f"**Model:** `{HF_LLM_MODEL}`\n"
+            f"**Error:** `{type(e).__name__}: {e}`\n\n"
+            "✅ Fix:\n"
+            "1) Go to **Space → Settings → Variables and secrets**\n"
+            "2) Add/Change a **Variable** named `HF_LLM_MODEL` to a model you can access on HF Inference.\n"
+            "3) Restart Space.\n"
+        )
 # -----------------------------
     text = pdf_to_text(pdf_path)
     if not text.strip():
+        return None, None, "Could not extract text (scanned PDF). Use a text-based PDF or add OCR."
     chunks = chunk_text(text)
     if len(chunks) < 2:
+        return None, None, "Not enough text to build RAG index."
+    index = build_faiss_index(chunks, embedder)
     return index, chunks, f"✅ Indexed {len(chunks)} chunks. Now ask a question."
     if not HF_TOKEN:
         return (
+            "HF token not found.\n\n"
+            "Go to **Space → Settings → Variables and secrets → New secret**\n"
             "Name: `HUGGINGFACEHUB_API_TOKEN`\n"
+            "Value: your hf_... token (no extra spaces/newlines)\n"
+            "Then **Restart Space**."
         )
     hits = retrieve(question, embedder, index, chunks, k=TOP_K)
     context = "\n\n".join([f"[{i+1}] {h[1]}" for i, h in enumerate(hits)])
+    prompt = f"""You are a helpful assistant. Answer using ONLY the context.
+If the answer is not in the context, say: "I don't know from the provided document."
+Question: {question}
+Context:
+{context}
+Answer:"""
+    ans = hf_generate_text(prompt)
     sources = "\n\n".join(
         [f"**Source {i+1} (score={hits[i][0]:.3f})**\n{hits[i][1][:600]}..." for i in range(len(hits))]
     gr.Markdown(
         "# 📄 Agentic Document Intelligence\n"
         "Upload a PDF and ask questions (RAG).\n\n"
+        "**Important:** This app forces `hf-inference` (so it does NOT use Together)."
     )
     pdf = gr.File(label="Upload PDF", type="filepath")
         outputs=[index_state, chunks_state, status],
     )
+    question = gr.Textbox(label="Ask a question", placeholder="e.g., Give a summary of the PDF")
     out = gr.Markdown()
     btn = gr.Button("Run")