HindiRAG / main.py
hardkpentium101's picture
update app.py
78430b6
import os
import sys
from pathlib import Path
# Add src directory to path
sys.path.append(os.path.join(os.path.dirname(__file__), 'src'))
from qdrant_setup import QdrantSetup
from document_ingestor import DocumentIngestor
from load_huggingface_dataset import HuggingFaceDatasetLoader
from embedding_generator import get_embedding_function
def setup_and_run():
"""
Main function to setup and run the Hindi RAG system
"""
print("Setting up Hindi RAG System...")
# Step 1: Initialize Qdrant
print("\n1. Setting up Qdrant...")
qdrant_setup = QdrantSetup()
qdrant_setup.create_collection()
# Step 2: Load and ingest documents from HuggingFace datasets
print("\n2. Loading datasets from HuggingFace...")
qdrant_client = qdrant_setup.get_client()
collection_name = qdrant_setup.get_collection_name()
embedding_func = get_embedding_function()
hf_loader = HuggingFaceDatasetLoader(qdrant_client, collection_name)
num_hf_docs = hf_loader.load_and_ingest_all_configured(embedding_func)
if num_hf_docs > 0:
print(f" ✓ Loaded {num_hf_docs} new documents from HuggingFace datasets")
else:
hf_datasets = os.getenv("HF_DATASETS", "")
if hf_datasets:
print(f" ⊘ HuggingFace datasets already ingested (skipped)")
else:
print(" No HuggingFace datasets configured (set HF_DATASETS env variable)")
# Step 3: Load and ingest local documents
print("\n3. Loading and ingesting local documents...")
ingestor = DocumentIngestor(qdrant_client, collection_name)
# Use the data directory
data_dir = "./data"
if os.path.exists(data_dir):
num_docs = ingestor.load_and_ingest(data_dir, embedding_func)
print(f" Loaded {num_docs} local documents into Qdrant")
else:
print(f" Warning: Data directory '{data_dir}' does not exist")
# Step 4: Initialize RAG system with LLM provider from environment
print("\n4. Initializing RAG system...")
llm_provider = os.getenv("LLM_PROVIDER", "huggingface")
print(f" Using LLM provider: {llm_provider}")
print(" (RAG system will be initialized by frontend on first request)")
print("\n5. Hindi RAG System setup complete!")
print(" Starting Streamlit frontend...")
if __name__ == "__main__":
setup_and_run()