Financial_QnA / main.py
vibertron's picture
Upload 25 files
9272683 verified
#!/usr/bin/env python3
# ---------------------------------------------------------------
# Main Execution Script for Financial QA System
# Provides command-line interface for different system components
# ---------------------------------------------------------------
import sys
import logging
import argparse
from pathlib import Path
# Add src to path
sys.path.append(str(Path(__file__).parent / "src"))
from rag_system import RAGSystem # type: ignore
from data_processor import FinancialDataProcessor # type: ignore
from evaluation_system import ComprehensiveEvaluator # type: ignore
from fine_tune_system import FineTunedSystem, TrainingConfig # type: ignore
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
def run_data_processing():
"""Run data processing pipeline"""
logger.info("Starting data processing...")
processor = FinancialDataProcessor()
processed_texts, qa_pairs = processor.process_all_documents()
processor.save_processed_data()
# Generate chunks
chunks = processor.get_text_chunks()
logger.info(f"Data processing complete!")
logger.info(f"Processed {len(processed_texts)} documents")
logger.info(f"Generated {len(qa_pairs)} Q&A pairs")
logger.info(f"Created {len(chunks)} text chunks")
return processed_texts, qa_pairs, chunks
def run_rag_system(chunks):
"""Run RAG system evaluation"""
logger.info("Initializing RAG system...")
rag_system = RAGSystem()
rag_system.add_documents(chunks)
# Test questions
test_questions = [
"What was the company's revenue in 2024?",
"What are the total assets?",
"What type of company is this?",
"What is the capital of France?" # Irrelevant question
]
logger.info("Testing RAG system...")
for question in test_questions:
logger.info(f"\nQuestion: {question}")
response = rag_system.answer_question(question)
logger.info(f"Answer: {response['answer']}")
logger.info(f"Confidence: {response['confidence']:.3f}")
logger.info(f"Method: {response['method']}")
logger.info(f"Response Time: {response['response_time']:.3f}s")
return rag_system
def run_fine_tuned_system(qa_pairs):
"""Run fine-tuned system evaluation"""
logger.info("Initializing Fine-tuned system...")
fine_tune_system = FineTunedSystem()
# Fine-tune on the data
config = TrainingConfig(
learning_rate=5e-5,
batch_size=2,
num_epochs=2,
max_length=512,
warmup_steps=50,
weight_decay=0.01,
gradient_accumulation_steps=2,
save_steps=100,
eval_steps=100,
logging_steps=50
)
logger.info("Starting fine-tuning...")
output_dir = fine_tune_system.fine_tune_on_data(qa_pairs, config)
logger.info(f"Fine-tuning complete. Model saved to {output_dir}")
# Test questions
test_questions = [
"What was the company's revenue in 2024?",
"What are the total assets?",
"What type of company is this?"
]
logger.info("Testing Fine-tuned system...")
for question in test_questions:
logger.info(f"\nQuestion: {question}")
response = fine_tune_system.answer_question(question)
logger.info(f"Answer: {response['answer']}")
logger.info(f"Confidence: {response['confidence']:.3f}")
logger.info(f"Response Time: {response['response_time']:.3f}s")
return fine_tune_system
def run_comprehensive_evaluation():
"""Run comprehensive evaluation"""
logger.info("Starting comprehensive evaluation...")
evaluator = ComprehensiveEvaluator()
results = evaluator.run_comprehensive_evaluation()
logger.info("Comprehensive evaluation complete!")
return results
def run_streamlit_interface():
"""Run Streamlit interface"""
logger.info("Starting Streamlit interface...")
import subprocess
import os
# Change to src directory
os.chdir(Path(__file__).parent / "src")
# Run streamlit
cmd = ["streamlit", "run", "interface.py"]
logger.info(f"Running: {' '.join(cmd)}")
try:
subprocess.run(cmd, check=True)
except subprocess.CalledProcessError as e:
logger.error(f"Streamlit failed: {e}")
except FileNotFoundError:
logger.error("Streamlit not found. Please install with: pip install streamlit")
def main():
"""Main function with command-line interface"""
parser = argparse.ArgumentParser(
description="Financial QA System: RAG vs Fine-tuning Comparison"
)
parser.add_argument(
"mode",
choices=["data", "rag", "fine-tune", "evaluate", "interface", "all"],
help="Mode to run"
)
parser.add_argument(
"--output-dir",
default="output",
help="Output directory for results"
)
parser.add_argument(
"--verbose", "-v",
action="store_true",
help="Enable verbose logging"
)
args = parser.parse_args()
if args.verbose:
logging.getLogger().setLevel(logging.DEBUG)
# Create output directory
output_path = Path(args.output_dir)
output_path.mkdir(exist_ok=True)
try:
if args.mode == "data":
run_data_processing()
elif args.mode == "rag":
# First process data, then run RAG
_, _, chunks = run_data_processing()
run_rag_system(chunks)
elif args.mode == "fine-tune":
# First process data, then run fine-tuning
_, qa_pairs, _ = run_data_processing()
run_fine_tuned_system(qa_pairs)
elif args.mode == "evaluate":
run_comprehensive_evaluation()
elif args.mode == "interface":
run_streamlit_interface()
elif args.mode == "all":
# Run complete pipeline
logger.info("Running complete pipeline...")
# 1. Data processing
processed_texts, qa_pairs, chunks = run_data_processing()
# 2. RAG system
rag_system = run_rag_system(chunks)
# 3. Fine-tuned system
fine_tuned_system = run_fine_tuned_system(qa_pairs)
# 4. Comprehensive evaluation
results = run_comprehensive_evaluation()
logger.info("Complete pipeline finished successfully!")
logger.info(f"Mode '{args.mode}' completed successfully!")
except Exception as e:
logger.error(f"Error in mode '{args.mode}': {e}")
sys.exit(1)
if __name__ == "__main__":
main()