Spaces:

prithvi1029
/

agentic-document-intelligence

Running

App Files Files Community

prithvi1029 commited on 4 days ago

Commit

babb85f

verified ·

1 Parent(s): 07b60a3

Update app.py

Browse files

Files changed (1) hide show

app.py +47 -13

app.py CHANGED Viewed

@@ -13,9 +13,16 @@ from huggingface_hub import InferenceClient
 # Config
 # -----------------------------
 HF_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN") or os.getenv("HF_TOKEN")
-# Pick a model that works with Inference API (you can change this)
 HF_LLM_MODEL = os.getenv("HF_LLM_MODEL", "mistralai/Mistral-7B-Instruct-v0.3")
 EMBED_MODEL_NAME = os.getenv("EMBED_MODEL", "sentence-transformers/all-MiniLM-L6-v2")
 TOP_K = 4
@@ -27,6 +34,7 @@ def clean_text(s: str) -> str:
     s = re.sub(r"\s+", " ", s)
     return s.strip()
 def chunk_text(text: str, chunk_size=900, overlap=150):
     chunks = []
     start = 0
@@ -41,6 +49,7 @@ def chunk_text(text: str, chunk_size=900, overlap=150):
             break
     return [c for c in (clean_text(x) for x in chunks) if len(c) > 30]
 def pdf_to_text(pdf_path: str) -> str:
     reader = PdfReader(pdf_path)
     pages = []
@@ -50,6 +59,7 @@ def pdf_to_text(pdf_path: str) -> str:
             pages.append(t)
     return "\n".join(pages)
 def build_faiss_index(chunks, embedder):
     vectors = embedder.encode(chunks, convert_to_numpy=True, normalize_embeddings=True)
     dim = vectors.shape[1]
@@ -57,6 +67,7 @@ def build_faiss_index(chunks, embedder):
     index.add(vectors.astype(np.float32))
     return index, vectors
 def retrieve(query, embedder, index, chunks, k=TOP_K):
     qv = embedder.encode([query], convert_to_numpy=True, normalize_embeddings=True).astype(np.float32)
     scores, ids = index.search(qv, k)
@@ -67,16 +78,24 @@ def retrieve(query, embedder, index, chunks, k=TOP_K):
         hits.append((float(score), chunks[int(idx)]))
     return hits
 def hf_generate(client: InferenceClient, prompt: str) -> str:
-    # Works with many chat/instruct models using "text_generation"
-    out = client.text_generation(
-        prompt,
-        max_new_tokens=450,
         temperature=0.2,
         top_p=0.9,
-        repetition_penalty=1.08,
     )
-    return out.strip()
 # -----------------------------
@@ -84,6 +103,7 @@ def hf_generate(client: InferenceClient, prompt: str) -> str:
 # -----------------------------
 embedder = SentenceTransformer(EMBED_MODEL_NAME)
 def on_upload(pdf_path):
     if not pdf_path:
         return None, None, "Please upload a PDF."
@@ -99,9 +119,11 @@ def on_upload(pdf_path):
     index, _ = build_faiss_index(chunks, embedder)
     return index, chunks, f"✅ Indexed {len(chunks)} chunks. Now ask a question."
 def answer_question(index, chunks, question):
     if index is None or chunks is None:
-        return "Upload a PDF first."
     if not question or not question.strip():
         return "Type a question."
@@ -114,8 +136,8 @@ def answer_question(index, chunks, question):
     hits = retrieve(question, embedder, index, chunks, k=TOP_K)
     context = "\n\n".join([f"[{i+1}] {h[1]}" for i, h in enumerate(hits)])
-    prompt = f"""You are a helpful assistant. Answer using ONLY the context.
-If the answer is not in the context, say "I don't know from the provided document."
 Question: {question}
@@ -124,10 +146,18 @@ Context:
 Answer:"""
-    client = InferenceClient(model=HF_LLM_MODEL, token=HF_TOKEN)
     ans = hf_generate(client, prompt)
-    sources = "\n\n".join([f"**Source {i+1} (score={hits[i][0]:.3f})**\n{hits[i][1][:600]}..." for i in range(len(hits))])
     return f"### Answer\n{ans}\n\n---\n### Retrieved Sources\n{sources}"
@@ -136,7 +166,11 @@ Answer:"""
 # UI
 # -----------------------------
 with gr.Blocks(title="Agentic Document Intelligence (HF RAG)") as demo:
-    gr.Markdown("# 📄 Agentic Document Intelligence\nUpload a PDF and ask questions (RAG) — using Hugging Face Inference API.")
     pdf = gr.File(label="Upload PDF", type="filepath")
     status = gr.Markdown()

 # Config
 # -----------------------------
 HF_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN") or os.getenv("HF_TOKEN")
+# LLM (keep same default, but we will call it via chat_completion, not text_generation)
 HF_LLM_MODEL = os.getenv("HF_LLM_MODEL", "mistralai/Mistral-7B-Instruct-v0.3")
+# IMPORTANT:
+# If you are explicitly using Together as a provider, set this variable in Space secrets:
+# HF_PROVIDER="together"
+# If you leave it empty, it will use Hugging Face default provider.
+HF_PROVIDER = os.getenv("HF_PROVIDER", "").strip() or None
 EMBED_MODEL_NAME = os.getenv("EMBED_MODEL", "sentence-transformers/all-MiniLM-L6-v2")
 TOP_K = 4
     s = re.sub(r"\s+", " ", s)
     return s.strip()
 def chunk_text(text: str, chunk_size=900, overlap=150):
     chunks = []
     start = 0
             break
     return [c for c in (clean_text(x) for x in chunks) if len(c) > 30]
 def pdf_to_text(pdf_path: str) -> str:
     reader = PdfReader(pdf_path)
     pages = []
             pages.append(t)
     return "\n".join(pages)
 def build_faiss_index(chunks, embedder):
     vectors = embedder.encode(chunks, convert_to_numpy=True, normalize_embeddings=True)
     dim = vectors.shape[1]
     index.add(vectors.astype(np.float32))
     return index, vectors
 def retrieve(query, embedder, index, chunks, k=TOP_K):
     qv = embedder.encode([query], convert_to_numpy=True, normalize_embeddings=True).astype(np.float32)
     scores, ids = index.search(qv, k)
         hits.append((float(score), chunks[int(idx)]))
     return hits
 def hf_generate(client: InferenceClient, prompt: str) -> str:
+    """
+    FIX:
+    Together provider doesn't support `text_generation` for this model.
+    Use chat_completion (conversational) instead.
+    """
+    resp = client.chat_completion(
+        model=HF_LLM_MODEL,
+        messages=[
+            {"role": "system", "content": "You are a helpful assistant. Answer using ONLY the provided context."},
+            {"role": "user", "content": prompt},
+        ],
+        max_tokens=450,
         temperature=0.2,
         top_p=0.9,
     )
+    return resp.choices[0].message.content.strip()
 # -----------------------------
 # -----------------------------
 embedder = SentenceTransformer(EMBED_MODEL_NAME)
 def on_upload(pdf_path):
     if not pdf_path:
         return None, None, "Please upload a PDF."
     index, _ = build_faiss_index(chunks, embedder)
     return index, chunks, f"✅ Indexed {len(chunks)} chunks. Now ask a question."
 def answer_question(index, chunks, question):
+    # FIX: gate on index/chunks, NOT on the original pdf file
     if index is None or chunks is None:
+        return "Upload and index a PDF first."
     if not question or not question.strip():
         return "Type a question."
     hits = retrieve(question, embedder, index, chunks, k=TOP_K)
     context = "\n\n".join([f"[{i+1}] {h[1]}" for i, h in enumerate(hits)])
+    prompt = f"""Answer using ONLY the context.
+If the answer is not in the context, say: "I don't know from the provided document."
 Question: {question}
 Answer:"""
+    # If HF_PROVIDER is set to "together", this will route to Together.
+    # If not set, it uses Hugging Face default provider.
+    if HF_PROVIDER:
+        client = InferenceClient(provider=HF_PROVIDER, token=HF_TOKEN)
+    else:
+        client = InferenceClient(token=HF_TOKEN)
     ans = hf_generate(client, prompt)
+    sources = "\n\n".join(
+        [f"**Source {i+1} (score={hits[i][0]:.3f})**\n{hits[i][1][:600]}..." for i in range(len(hits))]
+    )
     return f"### Answer\n{ans}\n\n---\n### Retrieved Sources\n{sources}"
 # UI
 # -----------------------------
 with gr.Blocks(title="Agentic Document Intelligence (HF RAG)") as demo:
+    gr.Markdown(
+        "# 📄 Agentic Document Intelligence\n"
+        "Upload a PDF and ask questions (RAG) — using Hugging Face Inference API.\n\n"
+        "**Tip:** If you use Together as a provider, set Space secret `HF_PROVIDER=together`."
+    )
     pdf = gr.File(label="Upload PDF", type="filepath")
     status = gr.Markdown()