Spaces:

vibertron
/

Financial_QnA

Sleeping

App Files Files Community

Financial_QnA / main.py

vibertron

Upload 25 files

9272683 verified 5 months ago

raw

history blame contribute delete

6.78 kB

	#!/usr/bin/env python3
	# ---------------------------------------------------------------
	# Main Execution Script for Financial QA System
	# Provides command-line interface for different system components
	# ---------------------------------------------------------------

	import sys
	import logging
	import argparse
	from pathlib import Path


	# Add src to path
	sys.path.append(str(Path(__file__).parent / "src"))

	from rag_system import RAGSystem # type: ignore
	from data_processor import FinancialDataProcessor # type: ignore
	from evaluation_system import ComprehensiveEvaluator # type: ignore
	from fine_tune_system import FineTunedSystem, TrainingConfig # type: ignore

	logging.basicConfig(level=logging.INFO)
	logger = logging.getLogger(__name__)

	def run_data_processing():
	"""Run data processing pipeline"""
	logger.info("Starting data processing...")

	processor = FinancialDataProcessor()
	processed_texts, qa_pairs = processor.process_all_documents()
	processor.save_processed_data()

	# Generate chunks
	chunks = processor.get_text_chunks()

	logger.info(f"Data processing complete!")
	logger.info(f"Processed {len(processed_texts)} documents")
	logger.info(f"Generated {len(qa_pairs)} Q&A pairs")
	logger.info(f"Created {len(chunks)} text chunks")

	return processed_texts, qa_pairs, chunks

	def run_rag_system(chunks):
	"""Run RAG system evaluation"""
	logger.info("Initializing RAG system...")

	rag_system = RAGSystem()
	rag_system.add_documents(chunks)

	# Test questions
	test_questions = [
	"What was the company's revenue in 2024?",
	"What are the total assets?",
	"What type of company is this?",
	"What is the capital of France?" # Irrelevant question
	]

	logger.info("Testing RAG system...")
	for question in test_questions:
	logger.info(f"\nQuestion: {question}")
	response = rag_system.answer_question(question)
	logger.info(f"Answer: {response['answer']}")
	logger.info(f"Confidence: {response['confidence']:.3f}")
	logger.info(f"Method: {response['method']}")
	logger.info(f"Response Time: {response['response_time']:.3f}s")

	return rag_system

	def run_fine_tuned_system(qa_pairs):
	"""Run fine-tuned system evaluation"""
	logger.info("Initializing Fine-tuned system...")

	fine_tune_system = FineTunedSystem()

	# Fine-tune on the data
	config = TrainingConfig(
	learning_rate=5e-5,
	batch_size=2,
	num_epochs=2,
	max_length=512,
	warmup_steps=50,
	weight_decay=0.01,
	gradient_accumulation_steps=2,
	save_steps=100,
	eval_steps=100,
	logging_steps=50
	)

	logger.info("Starting fine-tuning...")
	output_dir = fine_tune_system.fine_tune_on_data(qa_pairs, config)
	logger.info(f"Fine-tuning complete. Model saved to {output_dir}")

	# Test questions
	test_questions = [
	"What was the company's revenue in 2024?",
	"What are the total assets?",
	"What type of company is this?"
	]

	logger.info("Testing Fine-tuned system...")
	for question in test_questions:
	logger.info(f"\nQuestion: {question}")
	response = fine_tune_system.answer_question(question)
	logger.info(f"Answer: {response['answer']}")
	logger.info(f"Confidence: {response['confidence']:.3f}")
	logger.info(f"Response Time: {response['response_time']:.3f}s")

	return fine_tune_system

	def run_comprehensive_evaluation():
	"""Run comprehensive evaluation"""
	logger.info("Starting comprehensive evaluation...")

	evaluator = ComprehensiveEvaluator()
	results = evaluator.run_comprehensive_evaluation()

	logger.info("Comprehensive evaluation complete!")
	return results

	def run_streamlit_interface():
	"""Run Streamlit interface"""
	logger.info("Starting Streamlit interface...")

	import subprocess
	import os

	# Change to src directory
	os.chdir(Path(__file__).parent / "src")

	# Run streamlit
	cmd = ["streamlit", "run", "interface.py"]
	logger.info(f"Running: {' '.join(cmd)}")

	try:
	subprocess.run(cmd, check=True)
	except subprocess.CalledProcessError as e:
	logger.error(f"Streamlit failed: {e}")
	except FileNotFoundError:
	logger.error("Streamlit not found. Please install with: pip install streamlit")

	def main():
	"""Main function with command-line interface"""
	parser = argparse.ArgumentParser(
	description="Financial QA System: RAG vs Fine-tuning Comparison"
	)

	parser.add_argument(
	"mode",
	choices=["data", "rag", "fine-tune", "evaluate", "interface", "all"],
	help="Mode to run"
	)

	parser.add_argument(
	"--output-dir",
	default="output",
	help="Output directory for results"
	)

	parser.add_argument(
	"--verbose", "-v",
	action="store_true",
	help="Enable verbose logging"
	)

	args = parser.parse_args()

	if args.verbose:
	logging.getLogger().setLevel(logging.DEBUG)

	# Create output directory
	output_path = Path(args.output_dir)
	output_path.mkdir(exist_ok=True)

	try:
	if args.mode == "data":
	run_data_processing()

	elif args.mode == "rag":
	# First process data, then run RAG
	_, _, chunks = run_data_processing()
	run_rag_system(chunks)

	elif args.mode == "fine-tune":
	# First process data, then run fine-tuning
	_, qa_pairs, _ = run_data_processing()
	run_fine_tuned_system(qa_pairs)

	elif args.mode == "evaluate":
	run_comprehensive_evaluation()

	elif args.mode == "interface":
	run_streamlit_interface()

	elif args.mode == "all":
	# Run complete pipeline
	logger.info("Running complete pipeline...")

	# 1. Data processing
	processed_texts, qa_pairs, chunks = run_data_processing()

	# 2. RAG system
	rag_system = run_rag_system(chunks)

	# 3. Fine-tuned system
	fine_tuned_system = run_fine_tuned_system(qa_pairs)

	# 4. Comprehensive evaluation
	results = run_comprehensive_evaluation()

	logger.info("Complete pipeline finished successfully!")

	logger.info(f"Mode '{args.mode}' completed successfully!")

	except Exception as e:
	logger.error(f"Error in mode '{args.mode}': {e}")
	sys.exit(1)

	if __name__ == "__main__":
	main()