Update app.py
Browse files
app.py
CHANGED
|
@@ -140,8 +140,16 @@ def simple_process_pdfs(pdf_paths):
|
|
| 140 |
"""Process PDF documents and return document objects"""
|
| 141 |
documents = []
|
| 142 |
|
|
|
|
|
|
|
|
|
|
| 143 |
for pdf_path in pdf_paths:
|
| 144 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 145 |
text = ""
|
| 146 |
with open(pdf_path, 'rb') as file:
|
| 147 |
reader = PyPDF2.PdfReader(file)
|
|
@@ -161,6 +169,8 @@ def simple_process_pdfs(pdf_paths):
|
|
| 161 |
print(f"Warning: No text extracted from {pdf_path}")
|
| 162 |
except Exception as e:
|
| 163 |
print(f"Error processing {pdf_path}: {e}")
|
|
|
|
|
|
|
| 164 |
|
| 165 |
print(f"Processed {len(documents)} PDF documents")
|
| 166 |
return documents
|
|
@@ -414,26 +424,21 @@ comprehensive_evaluation_data = [
|
|
| 414 |
# Gradio Interface
|
| 415 |
def initialize_system():
|
| 416 |
"""Initialize the Vision 2030 Assistant system"""
|
| 417 |
-
#
|
| 418 |
-
|
| 419 |
-
# and if vector stores are already created
|
| 420 |
-
|
| 421 |
-
# Define paths
|
| 422 |
-
model_dir = "models"
|
| 423 |
-
vector_store_dir = "vector_stores"
|
| 424 |
-
pdf_dir = "pdf_data"
|
| 425 |
|
| 426 |
-
|
| 427 |
-
os.
|
| 428 |
-
os.makedirs(pdf_dir, exist_ok=True)
|
| 429 |
-
|
| 430 |
-
# Check if we need to download PDFs
|
| 431 |
-
pdf_files = ["vision2030_docs/saudi_vision203.pdf", "vision2030_docs/saudi_vision2030_ar.pdf"]
|
| 432 |
|
| 433 |
-
#
|
| 434 |
-
|
|
|
|
|
|
|
| 435 |
|
| 436 |
# Process PDFs and create vector store
|
|
|
|
|
|
|
|
|
|
| 437 |
if os.path.exists(os.path.join(vector_store_dir, "index.faiss")):
|
| 438 |
print("Loading existing vector store...")
|
| 439 |
embedding_function = HuggingFaceEmbeddings(
|
|
@@ -443,6 +448,8 @@ def initialize_system():
|
|
| 443 |
else:
|
| 444 |
print("Creating new vector store...")
|
| 445 |
documents = simple_process_pdfs(pdf_files)
|
|
|
|
|
|
|
| 446 |
vector_store = create_vector_store(documents)
|
| 447 |
vector_store.save_local(vector_store_dir)
|
| 448 |
|
|
@@ -505,14 +512,19 @@ def run_evaluation_on_sample(assistant, sample_index=0):
|
|
| 505 |
# Evaluate response
|
| 506 |
evaluation_results = evaluate_response(query, response, reference)
|
| 507 |
|
| 508 |
-
# Format for display
|
| 509 |
-
metrics_str = "\n".join([f"{k}: {v}" for k, v in evaluation_results.items()])
|
| 510 |
-
|
| 511 |
return query, response, reference, evaluation_results, sources, category, language
|
| 512 |
|
| 513 |
-
def qualitative_evaluation_interface(assistant):
|
| 514 |
"""Create a Gradio interface for qualitative evaluation"""
|
| 515 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 516 |
sample_options = [f"{i+1}. {item['query'][:50]}..." for i, item in enumerate(comprehensive_evaluation_data)]
|
| 517 |
|
| 518 |
with gr.Blocks(title="Vision 2030 Assistant - Qualitative Evaluation") as interface:
|
|
@@ -596,8 +608,11 @@ def qualitative_evaluation_interface(assistant):
|
|
| 596 |
query, response, reference, metrics, sources, category, language = run_evaluation_on_sample(assistant, index)
|
| 597 |
sources_str = ", ".join(sources)
|
| 598 |
return query, response, reference, metrics, sources_str, category, language
|
| 599 |
-
except:
|
| 600 |
-
|
|
|
|
|
|
|
|
|
|
| 601 |
|
| 602 |
eval_button.click(
|
| 603 |
handle_sample_selection,
|
|
@@ -614,6 +629,7 @@ def qualitative_evaluation_interface(assistant):
|
|
| 614 |
)
|
| 615 |
|
| 616 |
# Custom evaluation event handlers
|
|
|
|
| 617 |
def handle_custom_evaluation(query, reference):
|
| 618 |
if not query:
|
| 619 |
return "Please enter a query", "", {}
|
|
@@ -639,6 +655,7 @@ def qualitative_evaluation_interface(assistant):
|
|
| 639 |
)
|
| 640 |
|
| 641 |
# Conversation mode event handlers
|
|
|
|
| 642 |
def handle_conversation(message, history):
|
| 643 |
if not message:
|
| 644 |
return history, "", ""
|
|
@@ -672,19 +689,72 @@ def qualitative_evaluation_interface(assistant):
|
|
| 672 |
|
| 673 |
# Main function to run in Hugging Face Space
|
| 674 |
def main():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 675 |
# Initialize the system
|
| 676 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 677 |
assistant = initialize_system()
|
|
|
|
|
|
|
| 678 |
interface = qualitative_evaluation_interface(assistant)
|
|
|
|
|
|
|
| 679 |
interface.launch()
|
| 680 |
except Exception as e:
|
| 681 |
-
print(f"Error
|
|
|
|
|
|
|
|
|
|
| 682 |
# Create a simple error interface
|
| 683 |
-
gr.
|
| 684 |
-
|
| 685 |
-
|
| 686 |
-
|
| 687 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 688 |
|
| 689 |
if __name__ == "__main__":
|
| 690 |
main()
|
|
|
|
| 140 |
"""Process PDF documents and return document objects"""
|
| 141 |
documents = []
|
| 142 |
|
| 143 |
+
print(f"Attempting to process PDFs: {pdf_paths}")
|
| 144 |
+
print(f"Current directory contents: {os.listdir('.')}")
|
| 145 |
+
|
| 146 |
for pdf_path in pdf_paths:
|
| 147 |
try:
|
| 148 |
+
if not os.path.exists(pdf_path):
|
| 149 |
+
print(f"Warning: {pdf_path} does not exist")
|
| 150 |
+
continue
|
| 151 |
+
|
| 152 |
+
print(f"Processing {pdf_path}...")
|
| 153 |
text = ""
|
| 154 |
with open(pdf_path, 'rb') as file:
|
| 155 |
reader = PyPDF2.PdfReader(file)
|
|
|
|
| 169 |
print(f"Warning: No text extracted from {pdf_path}")
|
| 170 |
except Exception as e:
|
| 171 |
print(f"Error processing {pdf_path}: {e}")
|
| 172 |
+
import traceback
|
| 173 |
+
traceback.print_exc()
|
| 174 |
|
| 175 |
print(f"Processed {len(documents)} PDF documents")
|
| 176 |
return documents
|
|
|
|
| 424 |
# Gradio Interface
|
| 425 |
def initialize_system():
|
| 426 |
"""Initialize the Vision 2030 Assistant system"""
|
| 427 |
+
# Define paths for PDF files in the root directory
|
| 428 |
+
pdf_files = ["saudi_vision203.pdf", "saudi_vision2030_ar.pdf"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 429 |
|
| 430 |
+
# Print available files for debugging
|
| 431 |
+
print("Files in current directory:", os.listdir("."))
|
|
|
|
|
|
|
|
|
|
|
|
|
| 432 |
|
| 433 |
+
# Check if PDFs exist
|
| 434 |
+
for pdf_file in pdf_files:
|
| 435 |
+
if not os.path.exists(pdf_file):
|
| 436 |
+
print(f"Warning: {pdf_file} not found")
|
| 437 |
|
| 438 |
# Process PDFs and create vector store
|
| 439 |
+
vector_store_dir = "vector_stores"
|
| 440 |
+
os.makedirs(vector_store_dir, exist_ok=True)
|
| 441 |
+
|
| 442 |
if os.path.exists(os.path.join(vector_store_dir, "index.faiss")):
|
| 443 |
print("Loading existing vector store...")
|
| 444 |
embedding_function = HuggingFaceEmbeddings(
|
|
|
|
| 448 |
else:
|
| 449 |
print("Creating new vector store...")
|
| 450 |
documents = simple_process_pdfs(pdf_files)
|
| 451 |
+
if not documents:
|
| 452 |
+
raise ValueError("No documents were processed successfully. Cannot continue.")
|
| 453 |
vector_store = create_vector_store(documents)
|
| 454 |
vector_store.save_local(vector_store_dir)
|
| 455 |
|
|
|
|
| 512 |
# Evaluate response
|
| 513 |
evaluation_results = evaluate_response(query, response, reference)
|
| 514 |
|
|
|
|
|
|
|
|
|
|
| 515 |
return query, response, reference, evaluation_results, sources, category, language
|
| 516 |
|
| 517 |
+
def qualitative_evaluation_interface(assistant=None):
|
| 518 |
"""Create a Gradio interface for qualitative evaluation"""
|
| 519 |
|
| 520 |
+
# If assistant is None, create a simplified interface
|
| 521 |
+
if assistant is None:
|
| 522 |
+
with gr.Blocks(title="Vision 2030 Assistant - Initialization Error") as interface:
|
| 523 |
+
gr.Markdown("# Vision 2030 Assistant - Initialization Error")
|
| 524 |
+
gr.Markdown("There was an error initializing the assistant. Please check the logs for details.")
|
| 525 |
+
gr.Textbox(label="Status", value="System initialization failed")
|
| 526 |
+
return interface
|
| 527 |
+
|
| 528 |
sample_options = [f"{i+1}. {item['query'][:50]}..." for i, item in enumerate(comprehensive_evaluation_data)]
|
| 529 |
|
| 530 |
with gr.Blocks(title="Vision 2030 Assistant - Qualitative Evaluation") as interface:
|
|
|
|
| 608 |
query, response, reference, metrics, sources, category, language = run_evaluation_on_sample(assistant, index)
|
| 609 |
sources_str = ", ".join(sources)
|
| 610 |
return query, response, reference, metrics, sources_str, category, language
|
| 611 |
+
except Exception as e:
|
| 612 |
+
print(f"Error in handle_sample_selection: {e}")
|
| 613 |
+
import traceback
|
| 614 |
+
traceback.print_exc()
|
| 615 |
+
return f"Error processing selection: {e}", "", "", {}, "", "", ""
|
| 616 |
|
| 617 |
eval_button.click(
|
| 618 |
handle_sample_selection,
|
|
|
|
| 629 |
)
|
| 630 |
|
| 631 |
# Custom evaluation event handlers
|
| 632 |
+
@spaces.GPU # Use GPU for custom evaluation
|
| 633 |
def handle_custom_evaluation(query, reference):
|
| 634 |
if not query:
|
| 635 |
return "Please enter a query", "", {}
|
|
|
|
| 655 |
)
|
| 656 |
|
| 657 |
# Conversation mode event handlers
|
| 658 |
+
@spaces.GPU # Use GPU for conversation handling
|
| 659 |
def handle_conversation(message, history):
|
| 660 |
if not message:
|
| 661 |
return history, "", ""
|
|
|
|
| 689 |
|
| 690 |
# Main function to run in Hugging Face Space
|
| 691 |
def main():
|
| 692 |
+
# Start with a debugging report
|
| 693 |
+
print("=" * 50)
|
| 694 |
+
print("SYSTEM INITIALIZATION")
|
| 695 |
+
print("=" * 50)
|
| 696 |
+
print("Current directory:", os.getcwd())
|
| 697 |
+
print("Files in directory:", os.listdir("."))
|
| 698 |
+
print("=" * 50)
|
| 699 |
+
|
| 700 |
# Initialize the system
|
| 701 |
try:
|
| 702 |
+
# First check if PDF files exist
|
| 703 |
+
pdf_files = ["saudi_vision203.pdf", "saudi_vision2030_ar.pdf"]
|
| 704 |
+
for pdf_file in pdf_files:
|
| 705 |
+
if not os.path.exists(pdf_file):
|
| 706 |
+
print(f"Warning: {pdf_file} not found!")
|
| 707 |
+
|
| 708 |
+
# Process with initialization
|
| 709 |
+
print("Starting system initialization...")
|
| 710 |
assistant = initialize_system()
|
| 711 |
+
|
| 712 |
+
print("Creating interface...")
|
| 713 |
interface = qualitative_evaluation_interface(assistant)
|
| 714 |
+
|
| 715 |
+
print("Launching interface...")
|
| 716 |
interface.launch()
|
| 717 |
except Exception as e:
|
| 718 |
+
print(f"Error during initialization: {e}")
|
| 719 |
+
import traceback
|
| 720 |
+
traceback.print_exc()
|
| 721 |
+
|
| 722 |
# Create a simple error interface
|
| 723 |
+
with gr.Blocks(title="Vision 2030 Assistant - Error") as debug_interface:
|
| 724 |
+
gr.Markdown("# Vision 2030 Assistant - Initialization Error")
|
| 725 |
+
gr.Markdown("There was an error initializing the assistant.")
|
| 726 |
+
|
| 727 |
+
# Display error details
|
| 728 |
+
gr.Textbox(
|
| 729 |
+
value=f"Error: {str(e)}",
|
| 730 |
+
label="Error Details",
|
| 731 |
+
lines=5
|
| 732 |
+
)
|
| 733 |
+
|
| 734 |
+
# Show file system status
|
| 735 |
+
files_list = "\n".join(os.listdir("."))
|
| 736 |
+
gr.Textbox(
|
| 737 |
+
value=files_list,
|
| 738 |
+
label="Files in Directory",
|
| 739 |
+
lines=10
|
| 740 |
+
)
|
| 741 |
+
|
| 742 |
+
# Add a button to check PDFs
|
| 743 |
+
def check_pdfs():
|
| 744 |
+
result = []
|
| 745 |
+
for pdf_file in ["saudi_vision203.pdf", "saudi_vision2030_ar.pdf"]:
|
| 746 |
+
if os.path.exists(pdf_file):
|
| 747 |
+
size = os.path.getsize(pdf_file) / (1024 * 1024) # Size in MB
|
| 748 |
+
result.append(f"{pdf_file}: Found ({size:.2f} MB)")
|
| 749 |
+
else:
|
| 750 |
+
result.append(f"{pdf_file}: Not found")
|
| 751 |
+
return "\n".join(result)
|
| 752 |
+
|
| 753 |
+
check_btn = gr.Button("Check PDF Files")
|
| 754 |
+
pdf_status = gr.Textbox(label="PDF Status", lines=3)
|
| 755 |
+
check_btn.click(check_pdfs, inputs=[], outputs=[pdf_status])
|
| 756 |
+
|
| 757 |
+
debug_interface.launch()
|
| 758 |
|
| 759 |
if __name__ == "__main__":
|
| 760 |
main()
|