Spaces:

prithvi1029
/

agentic-document-intelligence

Sleeping

App Files Files Community

prithvi1029 commited on 22 days ago

Commit

2502eeb

verified ·

1 Parent(s): 889af78

Update app.py

Browse files

Files changed (1) hide show

app.py +22 -34

app.py CHANGED Viewed

@@ -12,18 +12,14 @@ from huggingface_hub import InferenceClient
 # -----------------------------
 # Config
 # -----------------------------
-# IMPORTANT: strip() removes accidental newline in token (common issue in Secrets)
 HF_TOKEN = (
     os.getenv("HUGGINGFACEHUB_API_TOKEN")
-    or os.getenv("HUGGINGFACEHUB_API_TOKEN".replace("-", "_"))  # just in case
     or os.getenv("HF_TOKEN")
     or ""
 ).strip()
-# Pick a model that is available to you on HF Inference
-# If mistralai/Mistral-7B-Instruct-v0.3 fails, set this in Space Variables:
-# HF_LLM_MODEL = "HuggingFaceH4/zephyr-7b-beta"  (example)
-HF_LLM_MODEL = os.getenv("HF_LLM_MODEL", "mistralai/Mistral-7B-Instruct-v0.3").strip()
 EMBED_MODEL_NAME = os.getenv("EMBED_MODEL", "sentence-transformers/all-MiniLM-L6-v2").strip()
 TOP_K = int(os.getenv("TOP_K", "4"))
@@ -81,14 +77,22 @@ def retrieve(query, embedder, index, chunks, k=TOP_K):
 def hf_generate_text(prompt: str) -> str:
     """
-    Force HF Inference (NOT Together).
-    Use text_generation endpoint (NOT chat_completion) to avoid "conversational" task errors.
     """
-    client = InferenceClient(provider="hf-inference", token=HF_TOKEN)
     try:
         out = client.text_generation(
-            model=HF_LLM_MODEL,
             prompt=prompt,
             max_new_tokens=450,
             temperature=0.2,
@@ -102,10 +106,11 @@ def hf_generate_text(prompt: str) -> str:
             "LLM call failed.\n\n"
             f"**Model:** `{HF_LLM_MODEL}`\n"
             f"**Error:** `{type(e).__name__}: {e}`\n\n"
-            "✅ Fix:\n"
-            "1) Go to **Space → Settings → Variables and secrets**\n"
-            "2) Add/Change a **Variable** named `HF_LLM_MODEL` to a model you can access on HF Inference.\n"
-            "3) Restart Space.\n"
         )
@@ -137,15 +142,6 @@ def answer_question(index, chunks, question):
     if not question or not question.strip():
         return "Type a question."
-    if not HF_TOKEN:
-        return (
-            "HF token not found.\n\n"
-            "Go to **Space → Settings → Variables and secrets → New secret**\n"
-            "Name: `HUGGINGFACEHUB_API_TOKEN`\n"
-            "Value: your hf_... token (no extra spaces/newlines)\n"
-            "Then **Restart Space**."
-        )
     hits = retrieve(question, embedder, index, chunks, k=TOP_K)
     context = "\n\n".join([f"[{i+1}] {h[1]}" for i, h in enumerate(hits)])
@@ -175,7 +171,7 @@ with gr.Blocks(title="Agentic Document Intelligence (HF RAG)") as demo:
     gr.Markdown(
         "# 📄 Agentic Document Intelligence\n"
         "Upload a PDF and ask questions (RAG).\n\n"
-        "**Important:** This app forces `hf-inference` (so it does NOT use Together)."
     )
     pdf = gr.File(label="Upload PDF", type="filepath")
@@ -184,20 +180,12 @@ with gr.Blocks(title="Agentic Document Intelligence (HF RAG)") as demo:
     index_state = gr.State(None)
     chunks_state = gr.State(None)
-    pdf.change(
-        fn=on_upload,
-        inputs=[pdf],
-        outputs=[index_state, chunks_state, status],
-    )
     question = gr.Textbox(label="Ask a question", placeholder="e.g., Give a summary of the PDF")
     out = gr.Markdown()
     btn = gr.Button("Run")
-    btn.click(
-        fn=answer_question,
-        inputs=[index_state, chunks_state, question],
-        outputs=[out],
-    )
 demo.launch()

 # -----------------------------
 # Config
 # -----------------------------
 HF_TOKEN = (
     os.getenv("HUGGINGFACEHUB_API_TOKEN")
+    or os.getenv("HUGGINGFACEHUB_API_TOKEN".replace("-", "_"))
     or os.getenv("HF_TOKEN")
     or ""
 ).strip()
+HF_LLM_MODEL = os.getenv("HF_LLM_MODEL", "HuggingFaceH4/zephyr-7b-beta").strip()
 EMBED_MODEL_NAME = os.getenv("EMBED_MODEL", "sentence-transformers/all-MiniLM-L6-v2").strip()
 TOP_K = int(os.getenv("TOP_K", "4"))
 def hf_generate_text(prompt: str) -> str:
     """
+    Uses NORMAL HF serverless inference (no Inference Providers router).
+    This avoids router 404 / supported-tasks errors you were getting.
     """
+    if not HF_TOKEN:
+        return (
+            "HF token not found.\n\n"
+            "Go to **Space → Settings → Variables and secrets → New secret**\n"
+            "Name: `HUGGINGFACEHUB_API_TOKEN`\n"
+            "Value: your hf_... token\n"
+            "Then restart the Space."
+        )
+    client = InferenceClient(model=HF_LLM_MODEL, token=HF_TOKEN)
     try:
         out = client.text_generation(
             prompt=prompt,
             max_new_tokens=450,
             temperature=0.2,
             "LLM call failed.\n\n"
             f"**Model:** `{HF_LLM_MODEL}`\n"
             f"**Error:** `{type(e).__name__}: {e}`\n\n"
+            "✅ Fix checklist:\n"
+            "1) Confirm `HF_LLM_MODEL` is exactly correct (copy-paste repo id).\n"
+            "2) If model is gated, open the model page and click **Agree / Request access**.\n"
+            "3) Recreate token with **Read** (usually enough) and ensure it’s pasted correctly in Space secrets.\n"
+            "4) Restart Space.\n"
         )
     if not question or not question.strip():
         return "Type a question."
     hits = retrieve(question, embedder, index, chunks, k=TOP_K)
     context = "\n\n".join([f"[{i+1}] {h[1]}" for i, h in enumerate(hits)])
     gr.Markdown(
         "# 📄 Agentic Document Intelligence\n"
         "Upload a PDF and ask questions (RAG).\n\n"
+        f"**Model:** `{HF_LLM_MODEL}`"
     )
     pdf = gr.File(label="Upload PDF", type="filepath")
     index_state = gr.State(None)
     chunks_state = gr.State(None)
+    pdf.change(fn=on_upload, inputs=[pdf], outputs=[index_state, chunks_state, status])
     question = gr.Textbox(label="Ask a question", placeholder="e.g., Give a summary of the PDF")
     out = gr.Markdown()
     btn = gr.Button("Run")
+    btn.click(fn=answer_question, inputs=[index_state, chunks_state, question], outputs=[out])
 demo.launch()