Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -236,7 +236,34 @@ def update_faiss_from_hf(repo_id, file, embedding_model="sentence-transformers/a
|
|
| 236 |
# result = update_faiss_from_hf("yourusername/my-faiss-store", "new_document.pdf")
|
| 237 |
# print(result)
|
| 238 |
#====================
|
| 239 |
-
def upload_and_prepare(file,user):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 240 |
# Load & split document
|
| 241 |
mm=""
|
| 242 |
if user == os.getenv("uploading_password"):
|
|
@@ -329,83 +356,61 @@ def generate_qa_chain(repo_id, embedding_model="sentence-transformers/all-MiniLM
|
|
| 329 |
#============================
|
| 330 |
def bePrepare():
|
| 331 |
global qa_chain
|
| 332 |
-
qa_chain = generate_qa_chain(
|
| 333 |
return "I am ready, ask me questions with model tiny Lama."
|
| 334 |
|
| 335 |
def bePrepare1():
|
| 336 |
global qa_chain1
|
| 337 |
-
qa_chain1 = generate_qa_chain(
|
| 338 |
return "I am ready, ask me questions with model google flan-t5."
|
| 339 |
|
| 340 |
def ask_question(query):
|
| 341 |
-
|
| 342 |
-
|
| 343 |
-
|
| 344 |
-
|
| 345 |
-
|
| 346 |
-
|
| 347 |
-
|
| 348 |
-
|
| 349 |
-
|
| 350 |
-
|
| 351 |
-
|
| 352 |
-
|
| 353 |
-
|
| 354 |
-
|
| 355 |
-
|
| 356 |
-
|
| 357 |
-
|
| 358 |
-
# HF dataset link
|
| 359 |
-
repo_url = f"https://huggingface.co/datasets/manabb/withPDFlink"
|
| 360 |
-
msg = f"{result}\n\n**π Document Sources:**{source_info}\n\n[View all documents]({repo_url})"
|
| 361 |
-
return msg
|
| 362 |
def ask_question1(query):
|
| 363 |
-
|
| 364 |
-
|
| 365 |
-
|
| 366 |
-
|
| 367 |
-
|
| 368 |
-
|
| 369 |
-
|
| 370 |
-
|
| 371 |
-
|
| 372 |
-
|
| 373 |
-
|
| 374 |
-
|
| 375 |
-
|
| 376 |
-
|
| 377 |
-
|
| 378 |
-
source_info += f"\n**Source {i+1}:** {os.path.basename(filename)} (Page {page_num})"
|
| 379 |
-
|
| 380 |
-
# HF dataset link
|
| 381 |
-
repo_url = f"https://huggingface.co/datasets/manabb/withPDFlink"
|
| 382 |
-
msg = f"{result}\n\n**π Document Sources:**{source_info}\n\n[View all documents]({repo_url})"
|
| 383 |
-
return msg
|
| 384 |
#===============================================
|
| 385 |
#delete entire repo
|
| 386 |
def delete_entire_repo(user):
|
| 387 |
mx="Unauthorized user."
|
| 388 |
-
|
| 389 |
-
|
| 390 |
-
|
| 391 |
-
|
| 392 |
-
|
| 393 |
-
|
| 394 |
-
|
| 395 |
-
|
| 396 |
-
|
| 397 |
-
|
| 398 |
-
# Create repo
|
| 399 |
-
repo_id = api.create_repo(
|
| 400 |
-
repo_id=os.getenv("reposit_id"),
|
| 401 |
-
repo_type="dataset",
|
| 402 |
-
private=False, # making public
|
| 403 |
-
exist_ok=False # Fail if exists
|
| 404 |
-
)
|
| 405 |
-
mx=mx+"New repo created.."
|
| 406 |
-
except Exception as e1:
|
| 407 |
-
mx = mx+f"β eoor during creation of depo: {e1}"
|
| 408 |
-
return mx
|
| 409 |
#===============================================
|
| 410 |
# β Static (never updates)
|
| 411 |
# pdf_list = gr.Markdown("**No documents loaded yet.**")
|
|
@@ -414,7 +419,7 @@ def delete_entire_repo(user):
|
|
| 414 |
def get_pdf_list():
|
| 415 |
repo_id=os.getenv("reposit_id")
|
| 416 |
try:
|
| 417 |
-
|
| 418 |
api = HfApi(token=os.getenv("HF_TOKEN"))
|
| 419 |
files = api.list_repo_files(repo_id, repo_type="dataset")
|
| 420 |
|
|
@@ -490,10 +495,10 @@ with gr.Blocks(title="N R L C H A T B O T - for commercial procurement - Supply"
|
|
| 490 |
authorized_user=gr.Textbox(label="Write the password to upload new Circular Doc.")
|
| 491 |
with gr.Row():
|
| 492 |
upload_btn = gr.Button("π Process Doc")
|
| 493 |
-
upload_btn.click(upload_and_prepare, inputs=[file_input,authorized_user], outputs=output_msg)
|
| 494 |
with gr.Row():
|
| 495 |
-
|
| 496 |
-
|
| 497 |
|
| 498 |
|
| 499 |
|
|
|
|
| 236 |
# result = update_faiss_from_hf("yourusername/my-faiss-store", "new_document.pdf")
|
| 237 |
# print(result)
|
| 238 |
#====================
|
| 239 |
+
def upload_and_prepare(file, user):
|
| 240 |
+
mm = ""
|
| 241 |
+
pdf_links = "**No PDFs**"
|
| 242 |
+
|
| 243 |
+
if user != os.getenv("uploading_password"):
|
| 244 |
+
return "β Unauthorized User", pdf_links
|
| 245 |
+
|
| 246 |
+
try:
|
| 247 |
+
if file_exists(repo_id=repo_id, filename="index.faiss", repo_type="dataset"):
|
| 248 |
+
mm = update_faiss_from_hf(repo_id, file)
|
| 249 |
+
else:
|
| 250 |
+
mm = create_faiss_index(repo_id, file)
|
| 251 |
+
|
| 252 |
+
# NOW this runs - generate PDF list
|
| 253 |
+
api = HfApi(token=os.getenv("HF_TOKEN"))
|
| 254 |
+
pdf_files = api.list_repo_files(repo_id, repo_type="dataset")
|
| 255 |
+
pdf_links = "\n".join([f"β’ [π {f}](https://huggingface.co/datasets/{repo_id}/resolve/main/{f})"
|
| 256 |
+
for f in pdf_files if f.endswith('.pdf')])
|
| 257 |
+
except Exception as e:
|
| 258 |
+
mm += f"\nβ Error: {e}"
|
| 259 |
+
|
| 260 |
+
return mm, pdf_links
|
| 261 |
+
|
| 262 |
+
|
| 263 |
+
#============
|
| 264 |
+
def upload_and_prepare_old(file,user):
|
| 265 |
+
#==============================
|
| 266 |
+
#=============================
|
| 267 |
# Load & split document
|
| 268 |
mm=""
|
| 269 |
if user == os.getenv("uploading_password"):
|
|
|
|
| 356 |
#============================
|
| 357 |
def bePrepare():
|
| 358 |
global qa_chain
|
| 359 |
+
qa_chain = generate_qa_chain(repo_id,llm=llm)
|
| 360 |
return "I am ready, ask me questions with model tiny Lama."
|
| 361 |
|
| 362 |
def bePrepare1():
|
| 363 |
global qa_chain1
|
| 364 |
+
qa_chain1 = generate_qa_chain(repo_id,llm=llm1)
|
| 365 |
return "I am ready, ask me questions with model google flan-t5."
|
| 366 |
|
| 367 |
def ask_question(query):
|
| 368 |
+
if not query or not qa_chain:
|
| 369 |
+
return "β Please click prepare button first and check whether question is empty"
|
| 370 |
+
|
| 371 |
+
response = qa_chain.invoke({"query": query})
|
| 372 |
+
result = response["result"]
|
| 373 |
+
sources = response.get("source_documents", [])
|
| 374 |
+
|
| 375 |
+
source_info = ""
|
| 376 |
+
for i, doc in enumerate(sources[:3]):
|
| 377 |
+
page_num = doc.metadata.get('page', 'Unknown')
|
| 378 |
+
filename = os.path.basename(doc.metadata.get('source', 'Unknown'))
|
| 379 |
+
repo_url = f"https://huggingface.co/datasets/{repo_id}/resolve/main/docs/{filename}"
|
| 380 |
+
source_info += f"\n**Source {i+1}:** [{filename} (Page {page_num})]({repo_url})"
|
| 381 |
+
|
| 382 |
+
return f"{result}\n\n**π Sources:**{source_info}"
|
| 383 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 384 |
def ask_question1(query):
|
| 385 |
+
if not query or not qa_chain1:
|
| 386 |
+
return "β Please click prepare button first and check whether question is empty"
|
| 387 |
+
|
| 388 |
+
response = qa_chain1.invoke({"query": query})
|
| 389 |
+
result = response["result"]
|
| 390 |
+
sources = response.get("source_documents", [])
|
| 391 |
+
|
| 392 |
+
source_info = ""
|
| 393 |
+
for i, doc in enumerate(sources[:3]):
|
| 394 |
+
page_num = doc.metadata.get('page', 'Unknown')
|
| 395 |
+
filename = os.path.basename(doc.metadata.get('source', 'Unknown'))
|
| 396 |
+
repo_url = f"https://huggingface.co/datasets/{repo_id}/resolve/main/docs/{filename}"
|
| 397 |
+
source_info += f"\n**Source {i+1}:** [{filename} (Page {page_num})]({repo_url})"
|
| 398 |
+
|
| 399 |
+
return f"{result}\n\n**π Sources:**{source_info}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 400 |
#===============================================
|
| 401 |
#delete entire repo
|
| 402 |
def delete_entire_repo(user):
|
| 403 |
mx="Unauthorized user."
|
| 404 |
+
repo=os.getenv("reposit_id")
|
| 405 |
+
if user != os.getenv("uploading_password"):
|
| 406 |
+
return "β Unauthorized user"
|
| 407 |
+
try:
|
| 408 |
+
api = HfApi(token=os.getenv("HF_TOKEN"))
|
| 409 |
+
api.delete_repo(repo_id=repo, repo_type="dataset")
|
| 410 |
+
api.create_repo(repo_id=repo, repo_type="dataset", private=False)
|
| 411 |
+
return f"β
Repo {repo_id} reset successfully"
|
| 412 |
+
except Exception as e:
|
| 413 |
+
mx=f"β error during deletetion & creation of repo: {e} "
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 414 |
#===============================================
|
| 415 |
# β Static (never updates)
|
| 416 |
# pdf_list = gr.Markdown("**No documents loaded yet.**")
|
|
|
|
| 419 |
def get_pdf_list():
|
| 420 |
repo_id=os.getenv("reposit_id")
|
| 421 |
try:
|
| 422 |
+
|
| 423 |
api = HfApi(token=os.getenv("HF_TOKEN"))
|
| 424 |
files = api.list_repo_files(repo_id, repo_type="dataset")
|
| 425 |
|
|
|
|
| 495 |
authorized_user=gr.Textbox(label="Write the password to upload new Circular Doc.")
|
| 496 |
with gr.Row():
|
| 497 |
upload_btn = gr.Button("π Process Doc")
|
| 498 |
+
upload_btn.click(upload_and_prepare, inputs=[file_input,authorized_user], outputs=[output_msg,pdf_list])
|
| 499 |
with gr.Row():
|
| 500 |
+
delete_btn = gr.Button("π Delete complete repo")
|
| 501 |
+
delete_btn.click(delete_entire_repo, inputs=authorized_user, outputs=output_msg)
|
| 502 |
|
| 503 |
|
| 504 |
|