prithvi1029 commited on
Commit
889af78
Β·
verified Β·
1 Parent(s): 572a56e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +59 -69
app.py CHANGED
@@ -12,18 +12,21 @@ from huggingface_hub import InferenceClient
12
  # -----------------------------
13
  # Config
14
  # -----------------------------
15
- HF_TOKEN = (os.getenv("HUGGINGFACEHUB_API_TOKEN") or os.getenv("HF_TOKEN") or "").strip()
 
 
 
 
 
 
16
 
17
- # IMPORTANT: force HF's own inference provider so it DOES NOT route via Together
18
- HF_PROVIDER = "hf-inference"
 
 
19
 
20
- # Pick a model that works with HF Inference.
21
- # If this model is not available on hf-inference for your account/region,
22
- # change it to another instruct/chat model you have access to.
23
- HF_LLM_MODEL = os.getenv("HF_LLM_MODEL", "mistralai/Mistral-7B-Instruct-v0.3")
24
-
25
- EMBED_MODEL_NAME = os.getenv("EMBED_MODEL", "sentence-transformers/all-MiniLM-L6-v2")
26
- TOP_K = 4
27
 
28
 
29
  # -----------------------------
@@ -41,9 +44,7 @@ def chunk_text(text: str, chunk_size=900, overlap=150):
41
  while start < n:
42
  end = min(n, start + chunk_size)
43
  chunks.append(text[start:end])
44
- start = end - overlap
45
- if start < 0:
46
- start = 0
47
  if end == n:
48
  break
49
  return [c for c in (clean_text(x) for x in chunks) if len(c) > 30]
@@ -64,7 +65,7 @@ def build_faiss_index(chunks, embedder):
64
  dim = vectors.shape[1]
65
  index = faiss.IndexFlatIP(dim) # cosine similarity since normalized
66
  index.add(vectors.astype(np.float32))
67
- return index, vectors
68
 
69
 
70
  def retrieve(query, embedder, index, chunks, k=TOP_K):
@@ -78,33 +79,34 @@ def retrieve(query, embedder, index, chunks, k=TOP_K):
78
  return hits
79
 
80
 
81
- def hf_generate(client: InferenceClient, question: str, context: str) -> str:
82
  """
83
- Use chat_completion, but FORCE provider=hf-inference so it won't route to Together.
 
84
  """
85
- system = (
86
- "You are a helpful assistant. Answer using ONLY the provided context from the document. "
87
- "If the answer is not in the context, say: \"I don't know from the provided document.\""
88
- )
89
 
90
- user = f"""Question: {question}
91
-
92
- Context:
93
- {context}
94
-
95
- Answer:"""
96
-
97
- resp = client.chat_completion(
98
- model=HF_LLM_MODEL,
99
- messages=[
100
- {"role": "system", "content": system},
101
- {"role": "user", "content": user},
102
- ],
103
- max_tokens=450,
104
- temperature=0.2,
105
- top_p=0.9,
106
- )
107
- return resp.choices[0].message.content.strip()
 
 
 
108
 
109
 
110
  # -----------------------------
@@ -119,16 +121,13 @@ def on_upload(pdf_path):
119
 
120
  text = pdf_to_text(pdf_path)
121
  if not text.strip():
122
- return None, None, (
123
- "Could not extract text from this PDF (it may be scanned / image-only). "
124
- "Try a text-based PDF or run OCR before uploading."
125
- )
126
 
127
  chunks = chunk_text(text)
128
  if len(chunks) < 2:
129
- return None, None, "Not enough extractable text to build the RAG index."
130
 
131
- index, _ = build_faiss_index(chunks, embedder)
132
  return index, chunks, f"βœ… Indexed {len(chunks)} chunks. Now ask a question."
133
 
134
 
@@ -140,35 +139,27 @@ def answer_question(index, chunks, question):
140
 
141
  if not HF_TOKEN:
142
  return (
143
- "❌ HF token not found.\n\n"
144
- "Go to Space β†’ Settings β†’ Variables and secrets β†’ New secret\n"
145
  "Name: `HUGGINGFACEHUB_API_TOKEN`\n"
146
- "Value: your `hf_...` token\n"
147
- "Then Restart the Space."
148
  )
149
 
150
- # Retrieve context
151
  hits = retrieve(question, embedder, index, chunks, k=TOP_K)
152
- if not hits:
153
- return "No relevant chunks retrieved from the PDF. Try a different question."
154
-
155
  context = "\n\n".join([f"[{i+1}] {h[1]}" for i, h in enumerate(hits)])
156
 
157
- # IMPORTANT: force hf-inference provider (NOT Together)
158
- client = InferenceClient(provider=HF_PROVIDER, token=HF_TOKEN)
159
 
160
- try:
161
- ans = hf_generate(client, question=question, context=context)
162
- except Exception as e:
163
- # Show clean error instead of crashing
164
- return (
165
- "❌ LLM call failed.\n\n"
166
- f"**Error:** `{type(e).__name__}: {str(e)}`\n\n"
167
- "βœ… Fix tips:\n"
168
- "- Ensure your secret `HUGGINGFACEHUB_API_TOKEN` is saved correctly (no newline).\n"
169
- "- If you still see `router.huggingface.co/together/...` in logs, you are not forcing hf-inference.\n"
170
- "- Try changing `HF_LLM_MODEL` to a model available to your account on HF Inference.\n"
171
- )
172
 
173
  sources = "\n\n".join(
174
  [f"**Source {i+1} (score={hits[i][0]:.3f})**\n{hits[i][1][:600]}..." for i in range(len(hits))]
@@ -184,8 +175,7 @@ with gr.Blocks(title="Agentic Document Intelligence (HF RAG)") as demo:
184
  gr.Markdown(
185
  "# πŸ“„ Agentic Document Intelligence\n"
186
  "Upload a PDF and ask questions (RAG).\n\n"
187
- "**Important:** This app forces `hf-inference` so it does NOT use Together.\n"
188
- "If your PDF is scanned (image-only), text extraction will fail unless OCR is used."
189
  )
190
 
191
  pdf = gr.File(label="Upload PDF", type="filepath")
@@ -200,7 +190,7 @@ with gr.Blocks(title="Agentic Document Intelligence (HF RAG)") as demo:
200
  outputs=[index_state, chunks_state, status],
201
  )
202
 
203
- question = gr.Textbox(label="Ask a question", placeholder="e.g., What is the payment term?")
204
  out = gr.Markdown()
205
  btn = gr.Button("Run")
206
 
 
12
  # -----------------------------
13
  # Config
14
  # -----------------------------
15
+ # IMPORTANT: strip() removes accidental newline in token (common issue in Secrets)
16
+ HF_TOKEN = (
17
+ os.getenv("HUGGINGFACEHUB_API_TOKEN")
18
+ or os.getenv("HUGGINGFACEHUB_API_TOKEN".replace("-", "_")) # just in case
19
+ or os.getenv("HF_TOKEN")
20
+ or ""
21
+ ).strip()
22
 
23
+ # Pick a model that is available to you on HF Inference
24
+ # If mistralai/Mistral-7B-Instruct-v0.3 fails, set this in Space Variables:
25
+ # HF_LLM_MODEL = "HuggingFaceH4/zephyr-7b-beta" (example)
26
+ HF_LLM_MODEL = os.getenv("HF_LLM_MODEL", "mistralai/Mistral-7B-Instruct-v0.3").strip()
27
 
28
+ EMBED_MODEL_NAME = os.getenv("EMBED_MODEL", "sentence-transformers/all-MiniLM-L6-v2").strip()
29
+ TOP_K = int(os.getenv("TOP_K", "4"))
 
 
 
 
 
30
 
31
 
32
  # -----------------------------
 
44
  while start < n:
45
  end = min(n, start + chunk_size)
46
  chunks.append(text[start:end])
47
+ start = max(0, end - overlap)
 
 
48
  if end == n:
49
  break
50
  return [c for c in (clean_text(x) for x in chunks) if len(c) > 30]
 
65
  dim = vectors.shape[1]
66
  index = faiss.IndexFlatIP(dim) # cosine similarity since normalized
67
  index.add(vectors.astype(np.float32))
68
+ return index
69
 
70
 
71
  def retrieve(query, embedder, index, chunks, k=TOP_K):
 
79
  return hits
80
 
81
 
82
+ def hf_generate_text(prompt: str) -> str:
83
  """
84
+ Force HF Inference (NOT Together).
85
+ Use text_generation endpoint (NOT chat_completion) to avoid "conversational" task errors.
86
  """
87
+ client = InferenceClient(provider="hf-inference", token=HF_TOKEN)
 
 
 
88
 
89
+ try:
90
+ out = client.text_generation(
91
+ model=HF_LLM_MODEL,
92
+ prompt=prompt,
93
+ max_new_tokens=450,
94
+ temperature=0.2,
95
+ top_p=0.9,
96
+ repetition_penalty=1.08,
97
+ return_full_text=False,
98
+ )
99
+ return (out or "").strip()
100
+ except Exception as e:
101
+ return (
102
+ "LLM call failed.\n\n"
103
+ f"**Model:** `{HF_LLM_MODEL}`\n"
104
+ f"**Error:** `{type(e).__name__}: {e}`\n\n"
105
+ "βœ… Fix:\n"
106
+ "1) Go to **Space β†’ Settings β†’ Variables and secrets**\n"
107
+ "2) Add/Change a **Variable** named `HF_LLM_MODEL` to a model you can access on HF Inference.\n"
108
+ "3) Restart Space.\n"
109
+ )
110
 
111
 
112
  # -----------------------------
 
121
 
122
  text = pdf_to_text(pdf_path)
123
  if not text.strip():
124
+ return None, None, "Could not extract text (scanned PDF). Use a text-based PDF or add OCR."
 
 
 
125
 
126
  chunks = chunk_text(text)
127
  if len(chunks) < 2:
128
+ return None, None, "Not enough text to build RAG index."
129
 
130
+ index = build_faiss_index(chunks, embedder)
131
  return index, chunks, f"βœ… Indexed {len(chunks)} chunks. Now ask a question."
132
 
133
 
 
139
 
140
  if not HF_TOKEN:
141
  return (
142
+ "HF token not found.\n\n"
143
+ "Go to **Space β†’ Settings β†’ Variables and secrets β†’ New secret**\n"
144
  "Name: `HUGGINGFACEHUB_API_TOKEN`\n"
145
+ "Value: your hf_... token (no extra spaces/newlines)\n"
146
+ "Then **Restart Space**."
147
  )
148
 
 
149
  hits = retrieve(question, embedder, index, chunks, k=TOP_K)
 
 
 
150
  context = "\n\n".join([f"[{i+1}] {h[1]}" for i, h in enumerate(hits)])
151
 
152
+ prompt = f"""You are a helpful assistant. Answer using ONLY the context.
153
+ If the answer is not in the context, say: "I don't know from the provided document."
154
 
155
+ Question: {question}
156
+
157
+ Context:
158
+ {context}
159
+
160
+ Answer:"""
161
+
162
+ ans = hf_generate_text(prompt)
 
 
 
 
163
 
164
  sources = "\n\n".join(
165
  [f"**Source {i+1} (score={hits[i][0]:.3f})**\n{hits[i][1][:600]}..." for i in range(len(hits))]
 
175
  gr.Markdown(
176
  "# πŸ“„ Agentic Document Intelligence\n"
177
  "Upload a PDF and ask questions (RAG).\n\n"
178
+ "**Important:** This app forces `hf-inference` (so it does NOT use Together)."
 
179
  )
180
 
181
  pdf = gr.File(label="Upload PDF", type="filepath")
 
190
  outputs=[index_state, chunks_state, status],
191
  )
192
 
193
+ question = gr.Textbox(label="Ask a question", placeholder="e.g., Give a summary of the PDF")
194
  out = gr.Markdown()
195
  btn = gr.Button("Run")
196