manabb commited on
Commit
87a3c7e
Β·
verified Β·
1 Parent(s): c1a952d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +75 -70
app.py CHANGED
@@ -236,7 +236,34 @@ def update_faiss_from_hf(repo_id, file, embedding_model="sentence-transformers/a
236
  # result = update_faiss_from_hf("yourusername/my-faiss-store", "new_document.pdf")
237
  # print(result)
238
  #====================
239
- def upload_and_prepare(file,user):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
240
  # Load & split document
241
  mm=""
242
  if user == os.getenv("uploading_password"):
@@ -329,83 +356,61 @@ def generate_qa_chain(repo_id, embedding_model="sentence-transformers/all-MiniLM
329
  #============================
330
  def bePrepare():
331
  global qa_chain
332
- qa_chain = generate_qa_chain(os.getenv("reposit_id"),llm=llm)
333
  return "I am ready, ask me questions with model tiny Lama."
334
 
335
  def bePrepare1():
336
  global qa_chain1
337
- qa_chain1 = generate_qa_chain(os.getenv("reposit_id"),llm=llm1)
338
  return "I am ready, ask me questions with model google flan-t5."
339
 
340
  def ask_question(query):
341
- msg="Blank question! "
342
- if not qa_chain:
343
- msg="❌ Please clik the button to get the udated resources with tiny Lama."
344
- if query:
345
- response = qa_chain.invoke({"query": query})
346
- #msg= response["result"]
347
- result = response["result"]
348
- # Extract source documents with page info
349
- sources = response.get("source_documents", [])
350
- source_info = ""
351
- pdf_url = f"https://huggingface.co/datasets/manabb/withPDFlink/resolve/main/docs/{os.path.basename(filename)}"
352
- source_info += f" [πŸ“„ PDF]({pdf_url})"
353
- for i, doc in enumerate(sources[:3]): # Top 3 sources
354
- page_num = getattr(doc.metadata, 'page', 'Unknown')
355
- filename = getattr(doc.metadata, 'source', 'Unknown PDF')
356
- source_info += f"\n**Source {i+1}:** {os.path.basename(filename)} (Page {page_num})"
357
-
358
- # HF dataset link
359
- repo_url = f"https://huggingface.co/datasets/manabb/withPDFlink"
360
- msg = f"{result}\n\n**πŸ“„ Document Sources:**{source_info}\n\n[View all documents]({repo_url})"
361
- return msg
362
  def ask_question1(query):
363
- msg="Blank question!"
364
- if not qa_chain1:
365
- msg="❌ Please clik the button to get the udated resources google flan-t5."
366
- if query:
367
- response1 = qa_chain1.invoke({"query": query})
368
- #msg=response1["result"]
369
- result = response["result"]
370
- # Extract source documents with page info
371
- sources = response.get("source_documents", [])
372
- source_info = ""
373
- pdf_url = f"https://huggingface.co/datasets/manabb/withPDFlink/resolve/main/docs/{os.path.basename(filename)}"
374
- source_info += f" [πŸ“„ PDF]({pdf_url})"
375
- for i, doc in enumerate(sources[:3]): # Top 3 sources
376
- page_num = getattr(doc.metadata, 'page', 'Unknown')
377
- filename = getattr(doc.metadata, 'source', 'Unknown PDF')
378
- source_info += f"\n**Source {i+1}:** {os.path.basename(filename)} (Page {page_num})"
379
-
380
- # HF dataset link
381
- repo_url = f"https://huggingface.co/datasets/manabb/withPDFlink"
382
- msg = f"{result}\n\n**πŸ“„ Document Sources:**{source_info}\n\n[View all documents]({repo_url})"
383
- return msg
384
  #===============================================
385
  #delete entire repo
386
  def delete_entire_repo(user):
387
  mx="Unauthorized user."
388
- if user == os.getenv("uploading_password"):
389
- try:
390
- api = HfApi(token=os.getenv("HF_TOKEN"))
391
- api.delete_repo(repo_id=os.getenv("reposit_id"), repo_type="dataset")
392
- mx=f"πŸ—‘οΈ Entire repo {repo_id} deleted. Create new one."
393
- except Exception as e:
394
- mx=f"❌ error during deletetion of repo: {e} "
395
- #create new repo
396
- try:
397
- api = HfApi(token=os.getenv("HF_TOKEN"))
398
- # Create repo
399
- repo_id = api.create_repo(
400
- repo_id=os.getenv("reposit_id"),
401
- repo_type="dataset",
402
- private=False, # making public
403
- exist_ok=False # Fail if exists
404
- )
405
- mx=mx+"New repo created.."
406
- except Exception as e1:
407
- mx = mx+f"❌ eoor during creation of depo: {e1}"
408
- return mx
409
  #===============================================
410
  # ❌ Static (never updates)
411
  # pdf_list = gr.Markdown("**No documents loaded yet.**")
@@ -414,7 +419,7 @@ def delete_entire_repo(user):
414
  def get_pdf_list():
415
  repo_id=os.getenv("reposit_id")
416
  try:
417
- from huggingface_hub import HfApi
418
  api = HfApi(token=os.getenv("HF_TOKEN"))
419
  files = api.list_repo_files(repo_id, repo_type="dataset")
420
 
@@ -490,10 +495,10 @@ with gr.Blocks(title="N R L C H A T B O T - for commercial procurement - Supply"
490
  authorized_user=gr.Textbox(label="Write the password to upload new Circular Doc.")
491
  with gr.Row():
492
  upload_btn = gr.Button("πŸ”„ Process Doc")
493
- upload_btn.click(upload_and_prepare, inputs=[file_input,authorized_user], outputs=output_msg)
494
  with gr.Row():
495
- upload_btn = gr.Button("πŸ”„ Delete complete repo")
496
- upload_btn.click(delete_entire_repo, inputs=authorized_user, outputs=output_msg)
497
 
498
 
499
 
 
236
  # result = update_faiss_from_hf("yourusername/my-faiss-store", "new_document.pdf")
237
  # print(result)
238
  #====================
239
+ def upload_and_prepare(file, user):
240
+ mm = ""
241
+ pdf_links = "**No PDFs**"
242
+
243
+ if user != os.getenv("uploading_password"):
244
+ return "❌ Unauthorized User", pdf_links
245
+
246
+ try:
247
+ if file_exists(repo_id=repo_id, filename="index.faiss", repo_type="dataset"):
248
+ mm = update_faiss_from_hf(repo_id, file)
249
+ else:
250
+ mm = create_faiss_index(repo_id, file)
251
+
252
+ # NOW this runs - generate PDF list
253
+ api = HfApi(token=os.getenv("HF_TOKEN"))
254
+ pdf_files = api.list_repo_files(repo_id, repo_type="dataset")
255
+ pdf_links = "\n".join([f"β€’ [πŸ“„ {f}](https://huggingface.co/datasets/{repo_id}/resolve/main/{f})"
256
+ for f in pdf_files if f.endswith('.pdf')])
257
+ except Exception as e:
258
+ mm += f"\n❌ Error: {e}"
259
+
260
+ return mm, pdf_links
261
+
262
+
263
+ #============
264
+ def upload_and_prepare_old(file,user):
265
+ #==============================
266
+ #=============================
267
  # Load & split document
268
  mm=""
269
  if user == os.getenv("uploading_password"):
 
356
  #============================
357
  def bePrepare():
358
  global qa_chain
359
+ qa_chain = generate_qa_chain(repo_id,llm=llm)
360
  return "I am ready, ask me questions with model tiny Lama."
361
 
362
  def bePrepare1():
363
  global qa_chain1
364
+ qa_chain1 = generate_qa_chain(repo_id,llm=llm1)
365
  return "I am ready, ask me questions with model google flan-t5."
366
 
367
  def ask_question(query):
368
+ if not query or not qa_chain:
369
+ return "❌ Please click prepare button first and check whether question is empty"
370
+
371
+ response = qa_chain.invoke({"query": query})
372
+ result = response["result"]
373
+ sources = response.get("source_documents", [])
374
+
375
+ source_info = ""
376
+ for i, doc in enumerate(sources[:3]):
377
+ page_num = doc.metadata.get('page', 'Unknown')
378
+ filename = os.path.basename(doc.metadata.get('source', 'Unknown'))
379
+ repo_url = f"https://huggingface.co/datasets/{repo_id}/resolve/main/docs/{filename}"
380
+ source_info += f"\n**Source {i+1}:** [{filename} (Page {page_num})]({repo_url})"
381
+
382
+ return f"{result}\n\n**πŸ“„ Sources:**{source_info}"
383
+
 
 
 
 
 
384
  def ask_question1(query):
385
+ if not query or not qa_chain1:
386
+ return "❌ Please click prepare button first and check whether question is empty"
387
+
388
+ response = qa_chain1.invoke({"query": query})
389
+ result = response["result"]
390
+ sources = response.get("source_documents", [])
391
+
392
+ source_info = ""
393
+ for i, doc in enumerate(sources[:3]):
394
+ page_num = doc.metadata.get('page', 'Unknown')
395
+ filename = os.path.basename(doc.metadata.get('source', 'Unknown'))
396
+ repo_url = f"https://huggingface.co/datasets/{repo_id}/resolve/main/docs/{filename}"
397
+ source_info += f"\n**Source {i+1}:** [{filename} (Page {page_num})]({repo_url})"
398
+
399
+ return f"{result}\n\n**πŸ“„ Sources:**{source_info}"
 
 
 
 
 
 
400
  #===============================================
401
  #delete entire repo
402
  def delete_entire_repo(user):
403
  mx="Unauthorized user."
404
+ repo=os.getenv("reposit_id")
405
+ if user != os.getenv("uploading_password"):
406
+ return "❌ Unauthorized user"
407
+ try:
408
+ api = HfApi(token=os.getenv("HF_TOKEN"))
409
+ api.delete_repo(repo_id=repo, repo_type="dataset")
410
+ api.create_repo(repo_id=repo, repo_type="dataset", private=False)
411
+ return f"βœ… Repo {repo_id} reset successfully"
412
+ except Exception as e:
413
+ mx=f"❌ error during deletetion & creation of repo: {e} "
 
 
 
 
 
 
 
 
 
 
 
414
  #===============================================
415
  # ❌ Static (never updates)
416
  # pdf_list = gr.Markdown("**No documents loaded yet.**")
 
419
  def get_pdf_list():
420
  repo_id=os.getenv("reposit_id")
421
  try:
422
+
423
  api = HfApi(token=os.getenv("HF_TOKEN"))
424
  files = api.list_repo_files(repo_id, repo_type="dataset")
425
 
 
495
  authorized_user=gr.Textbox(label="Write the password to upload new Circular Doc.")
496
  with gr.Row():
497
  upload_btn = gr.Button("πŸ”„ Process Doc")
498
+ upload_btn.click(upload_and_prepare, inputs=[file_input,authorized_user], outputs=[output_msg,pdf_list])
499
  with gr.Row():
500
+ delete_btn = gr.Button("πŸ”„ Delete complete repo")
501
+ delete_btn.click(delete_entire_repo, inputs=authorized_user, outputs=output_msg)
502
 
503
 
504