Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -329,12 +329,12 @@ def generate_qa_chain(repo_id, embedding_model="sentence-transformers/all-MiniLM
|
|
| 329 |
#============================
|
| 330 |
def bePrepare():
|
| 331 |
global qa_chain
|
| 332 |
-
qa_chain = generate_qa_chain("
|
| 333 |
return "I am ready, ask me questions with model tiny Lama."
|
| 334 |
|
| 335 |
def bePrepare1():
|
| 336 |
global qa_chain1
|
| 337 |
-
qa_chain1 = generate_qa_chain("
|
| 338 |
return "I am ready, ask me questions with model google flan-t5."
|
| 339 |
|
| 340 |
def ask_question(query):
|
|
@@ -348,7 +348,7 @@ def ask_question(query):
|
|
| 348 |
# Extract source documents with page info
|
| 349 |
sources = response.get("source_documents", [])
|
| 350 |
source_info = ""
|
| 351 |
-
pdf_url = f"https://huggingface.co/datasets/manabb/
|
| 352 |
source_info += f" [π PDF]({pdf_url})"
|
| 353 |
for i, doc in enumerate(sources[:3]): # Top 3 sources
|
| 354 |
page_num = getattr(doc.metadata, 'page', 'Unknown')
|
|
@@ -356,7 +356,7 @@ def ask_question(query):
|
|
| 356 |
source_info += f"\n**Source {i+1}:** {os.path.basename(filename)} (Page {page_num})"
|
| 357 |
|
| 358 |
# HF dataset link
|
| 359 |
-
repo_url = f"https://huggingface.co/datasets/manabb/
|
| 360 |
msg = f"{result}\n\n**π Document Sources:**{source_info}\n\n[View all documents]({repo_url})"
|
| 361 |
return msg
|
| 362 |
def ask_question1(query):
|
|
@@ -370,7 +370,7 @@ def ask_question1(query):
|
|
| 370 |
# Extract source documents with page info
|
| 371 |
sources = response.get("source_documents", [])
|
| 372 |
source_info = ""
|
| 373 |
-
pdf_url = f"https://huggingface.co/datasets/manabb/
|
| 374 |
source_info += f" [π PDF]({pdf_url})"
|
| 375 |
for i, doc in enumerate(sources[:3]): # Top 3 sources
|
| 376 |
page_num = getattr(doc.metadata, 'page', 'Unknown')
|
|
@@ -378,7 +378,7 @@ def ask_question1(query):
|
|
| 378 |
source_info += f"\n**Source {i+1}:** {os.path.basename(filename)} (Page {page_num})"
|
| 379 |
|
| 380 |
# HF dataset link
|
| 381 |
-
repo_url = f"https://huggingface.co/datasets/manabb/
|
| 382 |
msg = f"{result}\n\n**π Document Sources:**{source_info}\n\n[View all documents]({repo_url})"
|
| 383 |
return msg
|
| 384 |
#===============================================
|
|
@@ -407,6 +407,30 @@ def delete_entire_repo(user):
|
|
| 407 |
mx = mx+f"β eoor during creation of depo: {e1}"
|
| 408 |
return mx
|
| 409 |
#===============================================
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 410 |
# Gradio UI
|
| 411 |
with gr.Blocks(title="N R L C H A T B O T - for commercial procurement - Supply", css="""
|
| 412 |
#blue-col { background: linear-gradient(135deg, #667eea, #764ba2); padding: 20px; border-radius: 10px; }
|
|
@@ -452,6 +476,8 @@ with gr.Blocks(title="N R L C H A T B O T - for commercial procurement - Supply"
|
|
| 452 |
gr.Markdown("## π Uploaded Documents")
|
| 453 |
with gr.Row():
|
| 454 |
pdf_list = gr.Markdown("**No documents loaded yet.**")
|
|
|
|
|
|
|
| 455 |
with gr.Column(elem_id="blue-col",scale=1):
|
| 456 |
gr.Markdown("## π§ For uploading new PDF documents.")
|
| 457 |
with gr.Row():
|
|
|
|
| 329 |
#============================
|
| 330 |
def bePrepare():
|
| 331 |
global qa_chain
|
| 332 |
+
qa_chain = generate_qa_chain(os.getenv("reposit_id"),llm=llm)
|
| 333 |
return "I am ready, ask me questions with model tiny Lama."
|
| 334 |
|
| 335 |
def bePrepare1():
|
| 336 |
global qa_chain1
|
| 337 |
+
qa_chain1 = generate_qa_chain(os.getenv("reposit_id"),llm=llm1)
|
| 338 |
return "I am ready, ask me questions with model google flan-t5."
|
| 339 |
|
| 340 |
def ask_question(query):
|
|
|
|
| 348 |
# Extract source documents with page info
|
| 349 |
sources = response.get("source_documents", [])
|
| 350 |
source_info = ""
|
| 351 |
+
pdf_url = f"https://huggingface.co/datasets/manabb/withPDFlink/resolve/main/docs/{os.path.basename(filename)}"
|
| 352 |
source_info += f" [π PDF]({pdf_url})"
|
| 353 |
for i, doc in enumerate(sources[:3]): # Top 3 sources
|
| 354 |
page_num = getattr(doc.metadata, 'page', 'Unknown')
|
|
|
|
| 356 |
source_info += f"\n**Source {i+1}:** {os.path.basename(filename)} (Page {page_num})"
|
| 357 |
|
| 358 |
# HF dataset link
|
| 359 |
+
repo_url = f"https://huggingface.co/datasets/manabb/withPDFlink"
|
| 360 |
msg = f"{result}\n\n**π Document Sources:**{source_info}\n\n[View all documents]({repo_url})"
|
| 361 |
return msg
|
| 362 |
def ask_question1(query):
|
|
|
|
| 370 |
# Extract source documents with page info
|
| 371 |
sources = response.get("source_documents", [])
|
| 372 |
source_info = ""
|
| 373 |
+
pdf_url = f"https://huggingface.co/datasets/manabb/withPDFlink/resolve/main/docs/{os.path.basename(filename)}"
|
| 374 |
source_info += f" [π PDF]({pdf_url})"
|
| 375 |
for i, doc in enumerate(sources[:3]): # Top 3 sources
|
| 376 |
page_num = getattr(doc.metadata, 'page', 'Unknown')
|
|
|
|
| 378 |
source_info += f"\n**Source {i+1}:** {os.path.basename(filename)} (Page {page_num})"
|
| 379 |
|
| 380 |
# HF dataset link
|
| 381 |
+
repo_url = f"https://huggingface.co/datasets/manabb/withPDFlink"
|
| 382 |
msg = f"{result}\n\n**π Document Sources:**{source_info}\n\n[View all documents]({repo_url})"
|
| 383 |
return msg
|
| 384 |
#===============================================
|
|
|
|
| 407 |
mx = mx+f"β eoor during creation of depo: {e1}"
|
| 408 |
return mx
|
| 409 |
#===============================================
|
| 410 |
+
# β Static (never updates)
|
| 411 |
+
# pdf_list = gr.Markdown("**No documents loaded yet.**")
|
| 412 |
+
|
| 413 |
+
# β
Dynamic function
|
| 414 |
+
def get_pdf_list(repo_id):
|
| 415 |
+
try:
|
| 416 |
+
from huggingface_hub import HfApi
|
| 417 |
+
api = HfApi(token=os.getenv("HF_TOKEN"))
|
| 418 |
+
files = api.list_repo_files(repo_id, repo_type="dataset")
|
| 419 |
+
|
| 420 |
+
pdf_files = [f for f in files if f.endswith('.pdf')]
|
| 421 |
+
if not pdf_files:
|
| 422 |
+
return "**No PDF documents in repo yet.**"
|
| 423 |
+
|
| 424 |
+
links = []
|
| 425 |
+
for pdf in pdf_files:
|
| 426 |
+
url = f"https://huggingface.co/datasets/{repo_id}/resolve/main/{pdf}"
|
| 427 |
+
links.append(f"β’ [π {os.path.basename(pdf)}]({url})")
|
| 428 |
+
|
| 429 |
+
return f"**π Uploaded PDFs ({len(pdf_files)}):**\n" + "\n".join(links)
|
| 430 |
+
except:
|
| 431 |
+
return "**β Cannot load PDF list**"
|
| 432 |
+
|
| 433 |
+
#===============================================
|
| 434 |
# Gradio UI
|
| 435 |
with gr.Blocks(title="N R L C H A T B O T - for commercial procurement - Supply", css="""
|
| 436 |
#blue-col { background: linear-gradient(135deg, #667eea, #764ba2); padding: 20px; border-radius: 10px; }
|
|
|
|
| 476 |
gr.Markdown("## π Uploaded Documents")
|
| 477 |
with gr.Row():
|
| 478 |
pdf_list = gr.Markdown("**No documents loaded yet.**")
|
| 479 |
+
refresh_btn = gr.Button("π Refresh")
|
| 480 |
+
refresh_btn.click(get_pdf_list,inputs=os.getenv("reposit_id"),outputs=pdf_list)
|
| 481 |
with gr.Column(elem_id="blue-col",scale=1):
|
| 482 |
gr.Markdown("## π§ For uploading new PDF documents.")
|
| 483 |
with gr.Row():
|