prithvi1029 commited on
Commit
babb85f
Β·
verified Β·
1 Parent(s): 07b60a3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +47 -13
app.py CHANGED
@@ -13,9 +13,16 @@ from huggingface_hub import InferenceClient
13
  # Config
14
  # -----------------------------
15
  HF_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN") or os.getenv("HF_TOKEN")
16
- # Pick a model that works with Inference API (you can change this)
 
17
  HF_LLM_MODEL = os.getenv("HF_LLM_MODEL", "mistralai/Mistral-7B-Instruct-v0.3")
18
 
 
 
 
 
 
 
19
  EMBED_MODEL_NAME = os.getenv("EMBED_MODEL", "sentence-transformers/all-MiniLM-L6-v2")
20
  TOP_K = 4
21
 
@@ -27,6 +34,7 @@ def clean_text(s: str) -> str:
27
  s = re.sub(r"\s+", " ", s)
28
  return s.strip()
29
 
 
30
  def chunk_text(text: str, chunk_size=900, overlap=150):
31
  chunks = []
32
  start = 0
@@ -41,6 +49,7 @@ def chunk_text(text: str, chunk_size=900, overlap=150):
41
  break
42
  return [c for c in (clean_text(x) for x in chunks) if len(c) > 30]
43
 
 
44
  def pdf_to_text(pdf_path: str) -> str:
45
  reader = PdfReader(pdf_path)
46
  pages = []
@@ -50,6 +59,7 @@ def pdf_to_text(pdf_path: str) -> str:
50
  pages.append(t)
51
  return "\n".join(pages)
52
 
 
53
  def build_faiss_index(chunks, embedder):
54
  vectors = embedder.encode(chunks, convert_to_numpy=True, normalize_embeddings=True)
55
  dim = vectors.shape[1]
@@ -57,6 +67,7 @@ def build_faiss_index(chunks, embedder):
57
  index.add(vectors.astype(np.float32))
58
  return index, vectors
59
 
 
60
  def retrieve(query, embedder, index, chunks, k=TOP_K):
61
  qv = embedder.encode([query], convert_to_numpy=True, normalize_embeddings=True).astype(np.float32)
62
  scores, ids = index.search(qv, k)
@@ -67,16 +78,24 @@ def retrieve(query, embedder, index, chunks, k=TOP_K):
67
  hits.append((float(score), chunks[int(idx)]))
68
  return hits
69
 
 
70
  def hf_generate(client: InferenceClient, prompt: str) -> str:
71
- # Works with many chat/instruct models using "text_generation"
72
- out = client.text_generation(
73
- prompt,
74
- max_new_tokens=450,
 
 
 
 
 
 
 
 
75
  temperature=0.2,
76
  top_p=0.9,
77
- repetition_penalty=1.08,
78
  )
79
- return out.strip()
80
 
81
 
82
  # -----------------------------
@@ -84,6 +103,7 @@ def hf_generate(client: InferenceClient, prompt: str) -> str:
84
  # -----------------------------
85
  embedder = SentenceTransformer(EMBED_MODEL_NAME)
86
 
 
87
  def on_upload(pdf_path):
88
  if not pdf_path:
89
  return None, None, "Please upload a PDF."
@@ -99,9 +119,11 @@ def on_upload(pdf_path):
99
  index, _ = build_faiss_index(chunks, embedder)
100
  return index, chunks, f"βœ… Indexed {len(chunks)} chunks. Now ask a question."
101
 
 
102
  def answer_question(index, chunks, question):
 
103
  if index is None or chunks is None:
104
- return "Upload a PDF first."
105
  if not question or not question.strip():
106
  return "Type a question."
107
 
@@ -114,8 +136,8 @@ def answer_question(index, chunks, question):
114
  hits = retrieve(question, embedder, index, chunks, k=TOP_K)
115
  context = "\n\n".join([f"[{i+1}] {h[1]}" for i, h in enumerate(hits)])
116
 
117
- prompt = f"""You are a helpful assistant. Answer using ONLY the context.
118
- If the answer is not in the context, say "I don't know from the provided document."
119
 
120
  Question: {question}
121
 
@@ -124,10 +146,18 @@ Context:
124
 
125
  Answer:"""
126
 
127
- client = InferenceClient(model=HF_LLM_MODEL, token=HF_TOKEN)
 
 
 
 
 
 
128
  ans = hf_generate(client, prompt)
129
 
130
- sources = "\n\n".join([f"**Source {i+1} (score={hits[i][0]:.3f})**\n{hits[i][1][:600]}..." for i in range(len(hits))])
 
 
131
 
132
  return f"### Answer\n{ans}\n\n---\n### Retrieved Sources\n{sources}"
133
 
@@ -136,7 +166,11 @@ Answer:"""
136
  # UI
137
  # -----------------------------
138
  with gr.Blocks(title="Agentic Document Intelligence (HF RAG)") as demo:
139
- gr.Markdown("# πŸ“„ Agentic Document Intelligence\nUpload a PDF and ask questions (RAG) β€” using Hugging Face Inference API.")
 
 
 
 
140
 
141
  pdf = gr.File(label="Upload PDF", type="filepath")
142
  status = gr.Markdown()
 
13
  # Config
14
  # -----------------------------
15
  HF_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN") or os.getenv("HF_TOKEN")
16
+
17
+ # LLM (keep same default, but we will call it via chat_completion, not text_generation)
18
  HF_LLM_MODEL = os.getenv("HF_LLM_MODEL", "mistralai/Mistral-7B-Instruct-v0.3")
19
 
20
+ # IMPORTANT:
21
+ # If you are explicitly using Together as a provider, set this variable in Space secrets:
22
+ # HF_PROVIDER="together"
23
+ # If you leave it empty, it will use Hugging Face default provider.
24
+ HF_PROVIDER = os.getenv("HF_PROVIDER", "").strip() or None
25
+
26
  EMBED_MODEL_NAME = os.getenv("EMBED_MODEL", "sentence-transformers/all-MiniLM-L6-v2")
27
  TOP_K = 4
28
 
 
34
  s = re.sub(r"\s+", " ", s)
35
  return s.strip()
36
 
37
+
38
  def chunk_text(text: str, chunk_size=900, overlap=150):
39
  chunks = []
40
  start = 0
 
49
  break
50
  return [c for c in (clean_text(x) for x in chunks) if len(c) > 30]
51
 
52
+
53
  def pdf_to_text(pdf_path: str) -> str:
54
  reader = PdfReader(pdf_path)
55
  pages = []
 
59
  pages.append(t)
60
  return "\n".join(pages)
61
 
62
+
63
  def build_faiss_index(chunks, embedder):
64
  vectors = embedder.encode(chunks, convert_to_numpy=True, normalize_embeddings=True)
65
  dim = vectors.shape[1]
 
67
  index.add(vectors.astype(np.float32))
68
  return index, vectors
69
 
70
+
71
  def retrieve(query, embedder, index, chunks, k=TOP_K):
72
  qv = embedder.encode([query], convert_to_numpy=True, normalize_embeddings=True).astype(np.float32)
73
  scores, ids = index.search(qv, k)
 
78
  hits.append((float(score), chunks[int(idx)]))
79
  return hits
80
 
81
+
82
  def hf_generate(client: InferenceClient, prompt: str) -> str:
83
+ """
84
+ FIX:
85
+ Together provider doesn't support `text_generation` for this model.
86
+ Use chat_completion (conversational) instead.
87
+ """
88
+ resp = client.chat_completion(
89
+ model=HF_LLM_MODEL,
90
+ messages=[
91
+ {"role": "system", "content": "You are a helpful assistant. Answer using ONLY the provided context."},
92
+ {"role": "user", "content": prompt},
93
+ ],
94
+ max_tokens=450,
95
  temperature=0.2,
96
  top_p=0.9,
 
97
  )
98
+ return resp.choices[0].message.content.strip()
99
 
100
 
101
  # -----------------------------
 
103
  # -----------------------------
104
  embedder = SentenceTransformer(EMBED_MODEL_NAME)
105
 
106
+
107
  def on_upload(pdf_path):
108
  if not pdf_path:
109
  return None, None, "Please upload a PDF."
 
119
  index, _ = build_faiss_index(chunks, embedder)
120
  return index, chunks, f"βœ… Indexed {len(chunks)} chunks. Now ask a question."
121
 
122
+
123
  def answer_question(index, chunks, question):
124
+ # FIX: gate on index/chunks, NOT on the original pdf file
125
  if index is None or chunks is None:
126
+ return "Upload and index a PDF first."
127
  if not question or not question.strip():
128
  return "Type a question."
129
 
 
136
  hits = retrieve(question, embedder, index, chunks, k=TOP_K)
137
  context = "\n\n".join([f"[{i+1}] {h[1]}" for i, h in enumerate(hits)])
138
 
139
+ prompt = f"""Answer using ONLY the context.
140
+ If the answer is not in the context, say: "I don't know from the provided document."
141
 
142
  Question: {question}
143
 
 
146
 
147
  Answer:"""
148
 
149
+ # If HF_PROVIDER is set to "together", this will route to Together.
150
+ # If not set, it uses Hugging Face default provider.
151
+ if HF_PROVIDER:
152
+ client = InferenceClient(provider=HF_PROVIDER, token=HF_TOKEN)
153
+ else:
154
+ client = InferenceClient(token=HF_TOKEN)
155
+
156
  ans = hf_generate(client, prompt)
157
 
158
+ sources = "\n\n".join(
159
+ [f"**Source {i+1} (score={hits[i][0]:.3f})**\n{hits[i][1][:600]}..." for i in range(len(hits))]
160
+ )
161
 
162
  return f"### Answer\n{ans}\n\n---\n### Retrieved Sources\n{sources}"
163
 
 
166
  # UI
167
  # -----------------------------
168
  with gr.Blocks(title="Agentic Document Intelligence (HF RAG)") as demo:
169
+ gr.Markdown(
170
+ "# πŸ“„ Agentic Document Intelligence\n"
171
+ "Upload a PDF and ask questions (RAG) β€” using Hugging Face Inference API.\n\n"
172
+ "**Tip:** If you use Together as a provider, set Space secret `HF_PROVIDER=together`."
173
+ )
174
 
175
  pdf = gr.File(label="Upload PDF", type="filepath")
176
  status = gr.Markdown()