Spaces:

hugging2021
/

rag-the-game-changer

Build error

File size: 3,627 Bytes

40f6dcf

"""
Basic Example - Text Processing

Simple example demonstrating basic RAG usage for text documents.
"""

import asyncio
from config.pipeline_configs.rag_pipeline import RAGPipeline


async def main():
    """Run basic text processing example."""
    print("Basic Text Processing Example")
    print("=" * 50)

    # Initialize RAG pipeline
    pipeline = RAGPipeline(
        {"retrieval": {"strategy": "hybrid", "top_k": 5}, "generation": {"min_confidence": 0.7}}
    )

    # Sample documents
    documents = [
        {
            "document_id": "doc_001",
            "content": "RAG (Retrieval-Augmented Generation) is a technique in natural language processing (NLP) that retrieves relevant information from external sources and uses it to generate more accurate and contextually appropriate responses.",
            "metadata": {"title": "RAG Overview", "source": "example", "topic": "NLP"},
        },
        {
            "document_id": "doc_002",
            "content": "Vector databases are specialized databases designed to store, manage, and query vector embeddings. They enable efficient similarity search and retrieval in high-dimensional vector spaces, which is crucial for RAG systems.",
            "metadata": {"title": "Vector Databases", "source": "example", "topic": "Databases"},
        },
        {
            "document_id": "doc_003",
            "content": "Embeddings are numerical representations of text that capture semantic meaning. Modern embedding models like BERT, GPT, and sentence-transformers produce dense vector representations that can be used for semantic similarity search.",
            "metadata": {"title": "Embeddings", "source": "example", "topic": "ML"},
        },
        {
            "document_id": "doc_004",
            "content": "Chunking strategies for documents include fixed-size chunking (splitting text into equal-sized pieces), semantic chunking (splitting at natural boundaries like sentences or paragraphs), and token-based chunking (splitting based on token count for language models).",
            "metadata": {
                "title": "Chunking Strategies",
                "source": "example",
                "topic": "Text Processing",
            },
        },
    ]

    # Ingest documents
    print("\n1. Ingesting documents...")
    ingestion_result = await pipeline.ingest(documents, chunk_strategy="semantic")
    print(
        f"   Ingested {ingestion_result['successful']}/{ingestion_result['total_documents']} documents"
    )
    print(f"   Created {ingestion_result['total_chunks']} chunks")

    # Sample queries
    queries = [
        "What is RAG?",
        "How do vector databases work?",
        "What are embeddings?",
        "What are chunking strategies?",
        "How does semantic chunking work?",
    ]

    # Process queries
    print("\n2. Processing queries...")
    for i, query in enumerate(queries, 1):
        print(f"   Query {i}: {query}")

        result = await pipeline.query(
            query=query, top_k=5, include_sources=True, include_confidence=True
        )

        print(f"   Answer: {result.answer[:100]}...")
        print(f"   Confidence: {result.confidence:.2%}")
        print(f"   Sources: {len(result.sources)} retrieved")
        print(f"   Time: {result.total_time_ms:.2f}ms")
        print()

    # Get pipeline stats
    print("\n3. Pipeline Statistics:")
    stats = pipeline.get_stats()
    for key, value in stats.items():
        print(f"   {key}: {value}")

    print("\n" + "=" * 50)
    print("Example completed successfully!")


if __name__ == "__main__":
    asyncio.run(main())