Ahmed-Alghamdi's picture
Update main.py
8f0c11b verified
# main.py
import os
import sys
from document_processor import load_single_document
from embedding_generator import generate_embeddings
from search_engine import SearchEngine
from response_generator import ResponseGenerator
from config import Config
from utils import setup_logger
logger = setup_logger('main')
def initialize_system():
"""Initialize the search and response system."""
logger.info("Initializing the system...")
# NEW: Log configuration parameters
logger.info(f"Configuration: TOP_K={Config.TOP_K}, MIN_SIMILARITY={Config.MIN_SIMILARITY_SCORE}")
# Load and process documents
documents = load_single_document(Config.DOCUMENT_FILE)
# ❗ Prevent crash if no document loaded
if documents.empty:
raise FileNotFoundError(f"❌ No document found at: {Config.DOCUMENT_FILE}. "
f"Make sure the .txt file is uploaded in Hugging Face.")
# NEW: Log document statistics with chunks info
num_chunks = len(documents)
if 'chunk_id' in documents.columns:
total_chunks = documents['total_chunks'].iloc[0] if 'total_chunks' in documents.columns else num_chunks
logger.info(f"Loaded document with {num_chunks} chunks")
if 'content_length' in documents.columns:
avg_length = documents['content_length'].mean()
logger.info(f"Average chunk size: {avg_length:.0f} characters")
else:
logger.info(f"Loaded {len(documents)} documents")
# Generate embeddings
embeddings = generate_embeddings(documents)
logger.info(f"Generated embeddings of shape {embeddings.shape}")
# ❗ Prevent FAISS error if embeddings are empty
if embeddings.shape[0] == 0:
raise ValueError("❌ Embeddings are empty! Document might be empty or path is wrong.")
# Initialize search engine
search_engine = SearchEngine(documents, embeddings)
logger.info("Search engine initialized")
# Initialize response generator
response_generator = ResponseGenerator()
logger.info("Response generator initialized")
# NEW: Log system ready message
logger.info("βœ… System ready for queries!")
return search_engine, response_generator
def process_query(query, search_engine, response_generator):
"""Process a single query and return the response."""
# NEW: Log the query being processed
logger.info(f"Processing query: '{query}'")
relevant_docs = search_engine.search(query)
# NEW: Enhanced logging with similarity info
if len(relevant_docs) == 0:
logger.warning("⚠️ No relevant documents found above similarity threshold")
else:
if 'similarity_score' in relevant_docs.columns:
scores = relevant_docs['similarity_score'].values
logger.info(f"Found {len(relevant_docs)} relevant chunks (similarity: {scores.min():.2f} - {scores.max():.2f})")
else:
logger.info(f"Found {len(relevant_docs)} relevant documents")
response = response_generator.generate_response(query, relevant_docs)
# NEW: Log response length
logger.info(f"Generated response ({len(response)} characters)")
return response
def interactive_mode(search_engine, response_generator):
"""Run the system in interactive mode, processing queries from user input."""
print("Enter your queries. Type 'quit' to exit.")
while True:
query = input("Query: ").strip()
if query.lower() == 'quit':
break
response = process_query(query, search_engine, response_generator)
print(f"Response: {response}\n")
def batch_mode(input_file, output_file, search_engine, response_generator):
"""Process queries from an input file and write responses to an output file."""
with open(input_file, 'r', encoding='utf-8') as infile, open(output_file, 'w', encoding='utf-8') as outfile:
for line in infile:
query = line.strip()
response = process_query(query, search_engine, response_generator)
outfile.write(f"Query: {query}\nResponse: {response}\n\n")
logger.info(f"Batch processing completed. Results written to {output_file}")
def main():
search_engine, response_generator = initialize_system()
if len(sys.argv) > 1:
if sys.argv[1] == '--batch':
if len(sys.argv) != 4:
print("Usage for batch mode: python main.py --batch input_file output_file")
sys.exit(1)
batch_mode(sys.argv[2], sys.argv[3], search_engine, response_generator)
else:
print("Unknown argument. Use --batch for batch mode or no arguments for interactive mode.")
sys.exit(1)
else:
interactive_mode(search_engine, response_generator)
if __name__ == "__main__":
main()