prithvi1029 commited on
Commit
2502eeb
Β·
verified Β·
1 Parent(s): 889af78

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -34
app.py CHANGED
@@ -12,18 +12,14 @@ from huggingface_hub import InferenceClient
12
  # -----------------------------
13
  # Config
14
  # -----------------------------
15
- # IMPORTANT: strip() removes accidental newline in token (common issue in Secrets)
16
  HF_TOKEN = (
17
  os.getenv("HUGGINGFACEHUB_API_TOKEN")
18
- or os.getenv("HUGGINGFACEHUB_API_TOKEN".replace("-", "_")) # just in case
19
  or os.getenv("HF_TOKEN")
20
  or ""
21
  ).strip()
22
 
23
- # Pick a model that is available to you on HF Inference
24
- # If mistralai/Mistral-7B-Instruct-v0.3 fails, set this in Space Variables:
25
- # HF_LLM_MODEL = "HuggingFaceH4/zephyr-7b-beta" (example)
26
- HF_LLM_MODEL = os.getenv("HF_LLM_MODEL", "mistralai/Mistral-7B-Instruct-v0.3").strip()
27
 
28
  EMBED_MODEL_NAME = os.getenv("EMBED_MODEL", "sentence-transformers/all-MiniLM-L6-v2").strip()
29
  TOP_K = int(os.getenv("TOP_K", "4"))
@@ -81,14 +77,22 @@ def retrieve(query, embedder, index, chunks, k=TOP_K):
81
 
82
  def hf_generate_text(prompt: str) -> str:
83
  """
84
- Force HF Inference (NOT Together).
85
- Use text_generation endpoint (NOT chat_completion) to avoid "conversational" task errors.
86
  """
87
- client = InferenceClient(provider="hf-inference", token=HF_TOKEN)
 
 
 
 
 
 
 
 
 
88
 
89
  try:
90
  out = client.text_generation(
91
- model=HF_LLM_MODEL,
92
  prompt=prompt,
93
  max_new_tokens=450,
94
  temperature=0.2,
@@ -102,10 +106,11 @@ def hf_generate_text(prompt: str) -> str:
102
  "LLM call failed.\n\n"
103
  f"**Model:** `{HF_LLM_MODEL}`\n"
104
  f"**Error:** `{type(e).__name__}: {e}`\n\n"
105
- "βœ… Fix:\n"
106
- "1) Go to **Space β†’ Settings β†’ Variables and secrets**\n"
107
- "2) Add/Change a **Variable** named `HF_LLM_MODEL` to a model you can access on HF Inference.\n"
108
- "3) Restart Space.\n"
 
109
  )
110
 
111
 
@@ -137,15 +142,6 @@ def answer_question(index, chunks, question):
137
  if not question or not question.strip():
138
  return "Type a question."
139
 
140
- if not HF_TOKEN:
141
- return (
142
- "HF token not found.\n\n"
143
- "Go to **Space β†’ Settings β†’ Variables and secrets β†’ New secret**\n"
144
- "Name: `HUGGINGFACEHUB_API_TOKEN`\n"
145
- "Value: your hf_... token (no extra spaces/newlines)\n"
146
- "Then **Restart Space**."
147
- )
148
-
149
  hits = retrieve(question, embedder, index, chunks, k=TOP_K)
150
  context = "\n\n".join([f"[{i+1}] {h[1]}" for i, h in enumerate(hits)])
151
 
@@ -175,7 +171,7 @@ with gr.Blocks(title="Agentic Document Intelligence (HF RAG)") as demo:
175
  gr.Markdown(
176
  "# πŸ“„ Agentic Document Intelligence\n"
177
  "Upload a PDF and ask questions (RAG).\n\n"
178
- "**Important:** This app forces `hf-inference` (so it does NOT use Together)."
179
  )
180
 
181
  pdf = gr.File(label="Upload PDF", type="filepath")
@@ -184,20 +180,12 @@ with gr.Blocks(title="Agentic Document Intelligence (HF RAG)") as demo:
184
  index_state = gr.State(None)
185
  chunks_state = gr.State(None)
186
 
187
- pdf.change(
188
- fn=on_upload,
189
- inputs=[pdf],
190
- outputs=[index_state, chunks_state, status],
191
- )
192
 
193
  question = gr.Textbox(label="Ask a question", placeholder="e.g., Give a summary of the PDF")
194
  out = gr.Markdown()
195
  btn = gr.Button("Run")
196
 
197
- btn.click(
198
- fn=answer_question,
199
- inputs=[index_state, chunks_state, question],
200
- outputs=[out],
201
- )
202
 
203
  demo.launch()
 
12
  # -----------------------------
13
  # Config
14
  # -----------------------------
 
15
  HF_TOKEN = (
16
  os.getenv("HUGGINGFACEHUB_API_TOKEN")
17
+ or os.getenv("HUGGINGFACEHUB_API_TOKEN".replace("-", "_"))
18
  or os.getenv("HF_TOKEN")
19
  or ""
20
  ).strip()
21
 
22
+ HF_LLM_MODEL = os.getenv("HF_LLM_MODEL", "HuggingFaceH4/zephyr-7b-beta").strip()
 
 
 
23
 
24
  EMBED_MODEL_NAME = os.getenv("EMBED_MODEL", "sentence-transformers/all-MiniLM-L6-v2").strip()
25
  TOP_K = int(os.getenv("TOP_K", "4"))
 
77
 
78
  def hf_generate_text(prompt: str) -> str:
79
  """
80
+ Uses NORMAL HF serverless inference (no Inference Providers router).
81
+ This avoids router 404 / supported-tasks errors you were getting.
82
  """
83
+ if not HF_TOKEN:
84
+ return (
85
+ "HF token not found.\n\n"
86
+ "Go to **Space β†’ Settings β†’ Variables and secrets β†’ New secret**\n"
87
+ "Name: `HUGGINGFACEHUB_API_TOKEN`\n"
88
+ "Value: your hf_... token\n"
89
+ "Then restart the Space."
90
+ )
91
+
92
+ client = InferenceClient(model=HF_LLM_MODEL, token=HF_TOKEN)
93
 
94
  try:
95
  out = client.text_generation(
 
96
  prompt=prompt,
97
  max_new_tokens=450,
98
  temperature=0.2,
 
106
  "LLM call failed.\n\n"
107
  f"**Model:** `{HF_LLM_MODEL}`\n"
108
  f"**Error:** `{type(e).__name__}: {e}`\n\n"
109
+ "βœ… Fix checklist:\n"
110
+ "1) Confirm `HF_LLM_MODEL` is exactly correct (copy-paste repo id).\n"
111
+ "2) If model is gated, open the model page and click **Agree / Request access**.\n"
112
+ "3) Recreate token with **Read** (usually enough) and ensure it’s pasted correctly in Space secrets.\n"
113
+ "4) Restart Space.\n"
114
  )
115
 
116
 
 
142
  if not question or not question.strip():
143
  return "Type a question."
144
 
 
 
 
 
 
 
 
 
 
145
  hits = retrieve(question, embedder, index, chunks, k=TOP_K)
146
  context = "\n\n".join([f"[{i+1}] {h[1]}" for i, h in enumerate(hits)])
147
 
 
171
  gr.Markdown(
172
  "# πŸ“„ Agentic Document Intelligence\n"
173
  "Upload a PDF and ask questions (RAG).\n\n"
174
+ f"**Model:** `{HF_LLM_MODEL}`"
175
  )
176
 
177
  pdf = gr.File(label="Upload PDF", type="filepath")
 
180
  index_state = gr.State(None)
181
  chunks_state = gr.State(None)
182
 
183
+ pdf.change(fn=on_upload, inputs=[pdf], outputs=[index_state, chunks_state, status])
 
 
 
 
184
 
185
  question = gr.Textbox(label="Ask a question", placeholder="e.g., Give a summary of the PDF")
186
  out = gr.Markdown()
187
  btn = gr.Button("Run")
188
 
189
+ btn.click(fn=answer_question, inputs=[index_state, chunks_state, question], outputs=[out])
 
 
 
 
190
 
191
  demo.launch()