#!/usr/bin/env python3 # --------------------------------------------------------------- # Main Execution Script for Financial QA System # Provides command-line interface for different system components # --------------------------------------------------------------- import sys import logging import argparse from pathlib import Path # Add src to path sys.path.append(str(Path(__file__).parent / "src")) from rag_system import RAGSystem # type: ignore from data_processor import FinancialDataProcessor # type: ignore from evaluation_system import ComprehensiveEvaluator # type: ignore from fine_tune_system import FineTunedSystem, TrainingConfig # type: ignore logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) def run_data_processing(): """Run data processing pipeline""" logger.info("Starting data processing...") processor = FinancialDataProcessor() processed_texts, qa_pairs = processor.process_all_documents() processor.save_processed_data() # Generate chunks chunks = processor.get_text_chunks() logger.info(f"Data processing complete!") logger.info(f"Processed {len(processed_texts)} documents") logger.info(f"Generated {len(qa_pairs)} Q&A pairs") logger.info(f"Created {len(chunks)} text chunks") return processed_texts, qa_pairs, chunks def run_rag_system(chunks): """Run RAG system evaluation""" logger.info("Initializing RAG system...") rag_system = RAGSystem() rag_system.add_documents(chunks) # Test questions test_questions = [ "What was the company's revenue in 2024?", "What are the total assets?", "What type of company is this?", "What is the capital of France?" # Irrelevant question ] logger.info("Testing RAG system...") for question in test_questions: logger.info(f"\nQuestion: {question}") response = rag_system.answer_question(question) logger.info(f"Answer: {response['answer']}") logger.info(f"Confidence: {response['confidence']:.3f}") logger.info(f"Method: {response['method']}") logger.info(f"Response Time: {response['response_time']:.3f}s") return rag_system def run_fine_tuned_system(qa_pairs): """Run fine-tuned system evaluation""" logger.info("Initializing Fine-tuned system...") fine_tune_system = FineTunedSystem() # Fine-tune on the data config = TrainingConfig( learning_rate=5e-5, batch_size=2, num_epochs=2, max_length=512, warmup_steps=50, weight_decay=0.01, gradient_accumulation_steps=2, save_steps=100, eval_steps=100, logging_steps=50 ) logger.info("Starting fine-tuning...") output_dir = fine_tune_system.fine_tune_on_data(qa_pairs, config) logger.info(f"Fine-tuning complete. Model saved to {output_dir}") # Test questions test_questions = [ "What was the company's revenue in 2024?", "What are the total assets?", "What type of company is this?" ] logger.info("Testing Fine-tuned system...") for question in test_questions: logger.info(f"\nQuestion: {question}") response = fine_tune_system.answer_question(question) logger.info(f"Answer: {response['answer']}") logger.info(f"Confidence: {response['confidence']:.3f}") logger.info(f"Response Time: {response['response_time']:.3f}s") return fine_tune_system def run_comprehensive_evaluation(): """Run comprehensive evaluation""" logger.info("Starting comprehensive evaluation...") evaluator = ComprehensiveEvaluator() results = evaluator.run_comprehensive_evaluation() logger.info("Comprehensive evaluation complete!") return results def run_streamlit_interface(): """Run Streamlit interface""" logger.info("Starting Streamlit interface...") import subprocess import os # Change to src directory os.chdir(Path(__file__).parent / "src") # Run streamlit cmd = ["streamlit", "run", "interface.py"] logger.info(f"Running: {' '.join(cmd)}") try: subprocess.run(cmd, check=True) except subprocess.CalledProcessError as e: logger.error(f"Streamlit failed: {e}") except FileNotFoundError: logger.error("Streamlit not found. Please install with: pip install streamlit") def main(): """Main function with command-line interface""" parser = argparse.ArgumentParser( description="Financial QA System: RAG vs Fine-tuning Comparison" ) parser.add_argument( "mode", choices=["data", "rag", "fine-tune", "evaluate", "interface", "all"], help="Mode to run" ) parser.add_argument( "--output-dir", default="output", help="Output directory for results" ) parser.add_argument( "--verbose", "-v", action="store_true", help="Enable verbose logging" ) args = parser.parse_args() if args.verbose: logging.getLogger().setLevel(logging.DEBUG) # Create output directory output_path = Path(args.output_dir) output_path.mkdir(exist_ok=True) try: if args.mode == "data": run_data_processing() elif args.mode == "rag": # First process data, then run RAG _, _, chunks = run_data_processing() run_rag_system(chunks) elif args.mode == "fine-tune": # First process data, then run fine-tuning _, qa_pairs, _ = run_data_processing() run_fine_tuned_system(qa_pairs) elif args.mode == "evaluate": run_comprehensive_evaluation() elif args.mode == "interface": run_streamlit_interface() elif args.mode == "all": # Run complete pipeline logger.info("Running complete pipeline...") # 1. Data processing processed_texts, qa_pairs, chunks = run_data_processing() # 2. RAG system rag_system = run_rag_system(chunks) # 3. Fine-tuned system fine_tuned_system = run_fine_tuned_system(qa_pairs) # 4. Comprehensive evaluation results = run_comprehensive_evaluation() logger.info("Complete pipeline finished successfully!") logger.info(f"Mode '{args.mode}' completed successfully!") except Exception as e: logger.error(f"Error in mode '{args.mode}': {e}") sys.exit(1) if __name__ == "__main__": main()