Spaces:

navyamehta
/

mini-rag

Sleeping

File size: 6,884 Bytes

33f5651

import os
import time
import gradio as gr
from dotenv import load_dotenv

from ingest import ingest
from rag_core import RAGCore

load_dotenv()

rag = RAGCore()


def run_ingest(data_dir: str) -> str:
    try:
        count = ingest(data_dir=data_dir or os.getenv("DATA_DIR", "./data"))
        return f"Ingestion complete. Chunks ingested: {count}"
    except Exception as e:
        return f"Ingestion failed: {e}"


def process_text_input(text: str, chunk_size: int, chunk_overlap: int) -> str:
    """Process uploaded/pasted text and store in vector DB"""
    try:
        if not text.strip():
            return "No text provided"
        
        # Create temporary file for ingestion
        temp_dir = "./temp_upload"
        os.makedirs(temp_dir, exist_ok=True)
        temp_file = os.path.join(temp_dir, "user_input.txt")
        
        with open(temp_file, "w", encoding="utf-8") as f:
            f.write(text)
        
        # Ingest the text
        count = ingest(data_dir=temp_dir, chunk_size=chunk_size, chunk_overlap=chunk_overlap)
        
        # Clean up
        os.remove(temp_file)
        os.rmdir(temp_dir)
        
        return f"Text processed and stored: {count} chunks created"
    except Exception as e:
        return f"Text processing failed: {e}"


def answer_query(query: str, top_k: int, use_reranker: bool):
    try:
        start_time = time.time()
        
        # Retrieve and rerank
        docs, contexts = rag.retrieve(query, top_k=top_k, rerank=use_reranker)
        
        # Generate answer with inline citations
        answer = rag.generate_with_citations(query, contexts)
        
        # Calculate timing and estimates
        end_time = time.time()
        processing_time = end_time - start_time
        
        # Rough token estimates (very approximate)
        query_tokens = len(query.split()) * 1.3  # rough tokenization
        context_tokens = sum(len(c.split()) * 1.3 for c in contexts)
        answer_tokens = len(answer.split()) * 1.3
        
        # Cost estimates (rough, based on typical pricing)
        embedding_cost = (query_tokens + context_tokens) * 0.0001 / 1000  # $0.0001 per 1K tokens
        llm_cost = answer_tokens * 0.00003 / 1000  # $0.00003 per 1K tokens for GPT-4o-mini
        rerank_cost = len(contexts) * 0.0001 if use_reranker else 0  # $0.0001 per document
        
        total_cost = embedding_cost + llm_cost + rerank_cost
        
        # Format sources with citation numbers
        sources = []
        for i, doc in enumerate(docs):
            source_info = f"[{i+1}] {doc['metadata'].get('source', 'Unknown')}"
            if 'rerank_score' in doc:
                source_info += f" (rerank: {doc['rerank_score']:.3f})"
            else:
                source_info += f" (score: {doc.get('score', 0):.3f})"
            sources.append(source_info)
        
        sources_text = "\n".join(sources)
        
        # Add timing and cost info to answer
        answer_with_meta = f"{answer}\n\n---\n**Processing Time:** {processing_time:.2f}s\n**Estimated Cost:** ${total_cost:.6f}\n**Tokens:** Query: {query_tokens:.0f}, Context: {context_tokens:.0f}, Answer: {answer_tokens:.0f}"
        
        return answer_with_meta, sources_text
    except Exception as e:
        return f"Error: {e}", ""


def build_ui() -> gr.Blocks:
    with gr.Blocks(title="Mini RAG - Track B Assessment") as demo:
        gr.Markdown("""
        ## Mini RAG - Track B Assessment
        **Goal:** Build and host a small RAG app with text input, vector storage, retrieval + reranking, and LLM answering with citations.
        
        ### Features:
        - **Text Input/Upload:** Paste text or upload files (.txt, .md, .pdf)
        - **Vector Storage:** Pinecone cloud-hosted vector database
        - **Retrieval + Reranking:** Top-k retrieval with optional Cohere reranker
        - **LLM Answering:** OpenAI/Groq with inline citations [1], [2]
        - **Metrics:** Request timing and cost estimates
        """)

        with gr.Tab("Text Input"):
            gr.Markdown("### Process Text Input")
            text_input = gr.Textbox(label="Paste your text here", lines=10, placeholder="Enter or paste your document text here...")
            chunk_size = gr.Slider(400, 1200, value=800, step=100, label="Chunk Size (tokens)")
            chunk_overlap = gr.Slider(50, 200, value=120, step=10, label="Chunk Overlap (tokens)")
            process_btn = gr.Button("Process & Store Text")
            process_out = gr.Textbox(label="Status")
            process_btn.click(process_text_input, inputs=[text_input, chunk_size, chunk_overlap], outputs=[process_out])

        with gr.Tab("File Ingestion"):
            gr.Markdown("### Ingest Files from Directory")
            data_dir = gr.Textbox(label="Data directory", value=os.getenv("DATA_DIR", "./data"))
            ingest_btn = gr.Button("Run Ingestion")
            ingest_out = gr.Textbox(label="Status")
            ingest_btn.click(run_ingest, inputs=[data_dir], outputs=[ingest_out])

        with gr.Tab("Query"):
            gr.Markdown("### Ask Questions")
            query = gr.Textbox(label="Question", lines=3, placeholder="Ask a question about your stored documents...")
            top_k = gr.Slider(1, 20, value=5, step=1, label="Top K retrieval")
            use_reranker = gr.Checkbox(value=True, label="Use reranker (Cohere)")
            submit = gr.Button("Ask Question")
            answer = gr.Markdown(label="Answer with Citations")
            sources = gr.Markdown(label="Sources")
            submit.click(answer_query, inputs=[query, top_k, use_reranker], outputs=[answer, sources])

        with gr.Tab("Evaluation"):
            gr.Markdown("""
            ### Evaluation Examples (Gold Set)
            
            **Sample Q&A pairs for testing:**
            
            1. **Q:** What is the main topic of the document?
               **Expected:** Clear identification of document subject
               
            2. **Q:** What are the key findings or conclusions?
               **Expected:** Specific facts or conclusions from the text
               
            3. **Q:** What methodology was used?
               **Expected:** Description of approach or methods mentioned
               
            4. **Q:** What are the limitations discussed?
               **Expected:** Any limitations or constraints mentioned
               
            5. **Q:** What future work is suggested?
               **Expected:** Recommendations or future directions
               
            **Success Metrics:**
            - **Precision:** Relevant information in answers
            - **Recall:** Coverage of available information
            - **Citation Accuracy:** Proper source attribution
            """)

    return demo


if __name__ == "__main__":
    ui = build_ui()
    ui.launch()