prithvi1029 commited on
Commit
572a56e
Β·
verified Β·
1 Parent(s): 5d73db8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +53 -31
app.py CHANGED
@@ -14,10 +14,13 @@ from huggingface_hub import InferenceClient
14
  # -----------------------------
15
  HF_TOKEN = (os.getenv("HUGGINGFACEHUB_API_TOKEN") or os.getenv("HF_TOKEN") or "").strip()
16
 
17
- HF_LLM_MODEL = os.getenv("HF_LLM_MODEL", "mistralai/Mistral-7B-Instruct-v0.3")
 
18
 
19
- # Optional: set HF_PROVIDER="together" in Space secrets if you want Together
20
- HF_PROVIDER = (os.getenv("HF_PROVIDER") or "").strip() or None
 
 
21
 
22
  EMBED_MODEL_NAME = os.getenv("EMBED_MODEL", "sentence-transformers/all-MiniLM-L6-v2")
23
  TOP_K = 4
@@ -75,16 +78,27 @@ def retrieve(query, embedder, index, chunks, k=TOP_K):
75
  return hits
76
 
77
 
78
- def hf_generate(client: InferenceClient, prompt: str) -> str:
79
  """
80
- Use chat_completion (conversational) because Together does not support
81
- text_generation for mistralai/Mistral-7B-Instruct-v0.3.
82
  """
 
 
 
 
 
 
 
 
 
 
 
 
83
  resp = client.chat_completion(
84
  model=HF_LLM_MODEL,
85
  messages=[
86
- {"role": "system", "content": "You are a helpful assistant. Answer using ONLY the provided context."},
87
- {"role": "user", "content": prompt},
88
  ],
89
  max_tokens=450,
90
  temperature=0.2,
@@ -105,11 +119,14 @@ def on_upload(pdf_path):
105
 
106
  text = pdf_to_text(pdf_path)
107
  if not text.strip():
108
- return None, None, "Could not extract text from this PDF (it may be scanned). Try a text-based PDF."
 
 
 
109
 
110
  chunks = chunk_text(text)
111
  if len(chunks) < 2:
112
- return None, None, "Not enough extractable text to build RAG index."
113
 
114
  index, _ = build_faiss_index(chunks, embedder)
115
  return index, chunks, f"βœ… Indexed {len(chunks)} chunks. Now ask a question."
@@ -123,30 +140,35 @@ def answer_question(index, chunks, question):
123
 
124
  if not HF_TOKEN:
125
  return (
126
- "HF token not found. Go to Space β†’ Settings β†’ Variables and secrets β†’ "
127
- "add Secret named HUGGINGFACEHUB_API_TOKEN, then Restart Space."
 
 
 
128
  )
129
 
 
130
  hits = retrieve(question, embedder, index, chunks, k=TOP_K)
131
- context = "\n\n".join([f"[{i+1}] {h[1]}" for i, h in enumerate(hits)])
132
-
133
- prompt = f"""Answer using ONLY the context.
134
- If the answer is not in the context, say: "I don't know from the provided document."
135
-
136
- Question: {question}
137
-
138
- Context:
139
- {context}
140
 
141
- Answer:"""
142
 
143
- # Create client (provider optional)
144
- if HF_PROVIDER:
145
- client = InferenceClient(provider=HF_PROVIDER, token=HF_TOKEN)
146
- else:
147
- client = InferenceClient(token=HF_TOKEN)
148
 
149
- ans = hf_generate(client, prompt)
 
 
 
 
 
 
 
 
 
 
 
150
 
151
  sources = "\n\n".join(
152
  [f"**Source {i+1} (score={hits[i][0]:.3f})**\n{hits[i][1][:600]}..." for i in range(len(hits))]
@@ -161,9 +183,9 @@ Answer:"""
161
  with gr.Blocks(title="Agentic Document Intelligence (HF RAG)") as demo:
162
  gr.Markdown(
163
  "# πŸ“„ Agentic Document Intelligence\n"
164
- "Upload a PDF and ask questions (RAG) β€” using Hugging Face Inference API.\n\n"
165
- "**If using Together:** set Space secret `HF_PROVIDER=together`.\n"
166
- "**Token tip:** ensure HF token has no trailing newline."
167
  )
168
 
169
  pdf = gr.File(label="Upload PDF", type="filepath")
 
14
  # -----------------------------
15
  HF_TOKEN = (os.getenv("HUGGINGFACEHUB_API_TOKEN") or os.getenv("HF_TOKEN") or "").strip()
16
 
17
+ # IMPORTANT: force HF's own inference provider so it DOES NOT route via Together
18
+ HF_PROVIDER = "hf-inference"
19
 
20
+ # Pick a model that works with HF Inference.
21
+ # If this model is not available on hf-inference for your account/region,
22
+ # change it to another instruct/chat model you have access to.
23
+ HF_LLM_MODEL = os.getenv("HF_LLM_MODEL", "mistralai/Mistral-7B-Instruct-v0.3")
24
 
25
  EMBED_MODEL_NAME = os.getenv("EMBED_MODEL", "sentence-transformers/all-MiniLM-L6-v2")
26
  TOP_K = 4
 
78
  return hits
79
 
80
 
81
+ def hf_generate(client: InferenceClient, question: str, context: str) -> str:
82
  """
83
+ Use chat_completion, but FORCE provider=hf-inference so it won't route to Together.
 
84
  """
85
+ system = (
86
+ "You are a helpful assistant. Answer using ONLY the provided context from the document. "
87
+ "If the answer is not in the context, say: \"I don't know from the provided document.\""
88
+ )
89
+
90
+ user = f"""Question: {question}
91
+
92
+ Context:
93
+ {context}
94
+
95
+ Answer:"""
96
+
97
  resp = client.chat_completion(
98
  model=HF_LLM_MODEL,
99
  messages=[
100
+ {"role": "system", "content": system},
101
+ {"role": "user", "content": user},
102
  ],
103
  max_tokens=450,
104
  temperature=0.2,
 
119
 
120
  text = pdf_to_text(pdf_path)
121
  if not text.strip():
122
+ return None, None, (
123
+ "Could not extract text from this PDF (it may be scanned / image-only). "
124
+ "Try a text-based PDF or run OCR before uploading."
125
+ )
126
 
127
  chunks = chunk_text(text)
128
  if len(chunks) < 2:
129
+ return None, None, "Not enough extractable text to build the RAG index."
130
 
131
  index, _ = build_faiss_index(chunks, embedder)
132
  return index, chunks, f"βœ… Indexed {len(chunks)} chunks. Now ask a question."
 
140
 
141
  if not HF_TOKEN:
142
  return (
143
+ "❌ HF token not found.\n\n"
144
+ "Go to Space β†’ Settings β†’ Variables and secrets β†’ New secret\n"
145
+ "Name: `HUGGINGFACEHUB_API_TOKEN`\n"
146
+ "Value: your `hf_...` token\n"
147
+ "Then Restart the Space."
148
  )
149
 
150
+ # Retrieve context
151
  hits = retrieve(question, embedder, index, chunks, k=TOP_K)
152
+ if not hits:
153
+ return "No relevant chunks retrieved from the PDF. Try a different question."
 
 
 
 
 
 
 
154
 
155
+ context = "\n\n".join([f"[{i+1}] {h[1]}" for i, h in enumerate(hits)])
156
 
157
+ # IMPORTANT: force hf-inference provider (NOT Together)
158
+ client = InferenceClient(provider=HF_PROVIDER, token=HF_TOKEN)
 
 
 
159
 
160
+ try:
161
+ ans = hf_generate(client, question=question, context=context)
162
+ except Exception as e:
163
+ # Show clean error instead of crashing
164
+ return (
165
+ "❌ LLM call failed.\n\n"
166
+ f"**Error:** `{type(e).__name__}: {str(e)}`\n\n"
167
+ "βœ… Fix tips:\n"
168
+ "- Ensure your secret `HUGGINGFACEHUB_API_TOKEN` is saved correctly (no newline).\n"
169
+ "- If you still see `router.huggingface.co/together/...` in logs, you are not forcing hf-inference.\n"
170
+ "- Try changing `HF_LLM_MODEL` to a model available to your account on HF Inference.\n"
171
+ )
172
 
173
  sources = "\n\n".join(
174
  [f"**Source {i+1} (score={hits[i][0]:.3f})**\n{hits[i][1][:600]}..." for i in range(len(hits))]
 
183
  with gr.Blocks(title="Agentic Document Intelligence (HF RAG)") as demo:
184
  gr.Markdown(
185
  "# πŸ“„ Agentic Document Intelligence\n"
186
+ "Upload a PDF and ask questions (RAG).\n\n"
187
+ "**Important:** This app forces `hf-inference` so it does NOT use Together.\n"
188
+ "If your PDF is scanned (image-only), text extraction will fail unless OCR is used."
189
  )
190
 
191
  pdf = gr.File(label="Upload PDF", type="filepath")