import os import time import gradio as gr from dotenv import load_dotenv from ingest import ingest from rag_core import RAGCore load_dotenv() rag = RAGCore() def run_ingest(data_dir: str) -> str: try: count = ingest(data_dir=data_dir or os.getenv("DATA_DIR", "./data")) return f"Ingestion complete. Chunks ingested: {count}" except Exception as e: return f"Ingestion failed: {e}" def process_text_input(text: str, chunk_size: int, chunk_overlap: int) -> str: """Process uploaded/pasted text and store in vector DB""" try: if not text.strip(): return "No text provided" # Create temporary file for ingestion temp_dir = "./temp_upload" os.makedirs(temp_dir, exist_ok=True) temp_file = os.path.join(temp_dir, "user_input.txt") with open(temp_file, "w", encoding="utf-8") as f: f.write(text) # Ingest the text count = ingest(data_dir=temp_dir, chunk_size=chunk_size, chunk_overlap=chunk_overlap) # Clean up os.remove(temp_file) os.rmdir(temp_dir) return f"Text processed and stored: {count} chunks created" except Exception as e: return f"Text processing failed: {e}" def answer_query(query: str, top_k: int, use_reranker: bool): try: start_time = time.time() # Retrieve and rerank docs, contexts = rag.retrieve(query, top_k=top_k, rerank=use_reranker) # Generate answer with inline citations answer = rag.generate_with_citations(query, contexts) # Calculate timing and estimates end_time = time.time() processing_time = end_time - start_time # Rough token estimates (very approximate) query_tokens = len(query.split()) * 1.3 # rough tokenization context_tokens = sum(len(c.split()) * 1.3 for c in contexts) answer_tokens = len(answer.split()) * 1.3 # Cost estimates (rough, based on typical pricing) embedding_cost = (query_tokens + context_tokens) * 0.0001 / 1000 # $0.0001 per 1K tokens llm_cost = answer_tokens * 0.00003 / 1000 # $0.00003 per 1K tokens for GPT-4o-mini rerank_cost = len(contexts) * 0.0001 if use_reranker else 0 # $0.0001 per document total_cost = embedding_cost + llm_cost + rerank_cost # Format sources with citation numbers sources = [] for i, doc in enumerate(docs): source_info = f"[{i+1}] {doc['metadata'].get('source', 'Unknown')}" if 'rerank_score' in doc: source_info += f" (rerank: {doc['rerank_score']:.3f})" else: source_info += f" (score: {doc.get('score', 0):.3f})" sources.append(source_info) sources_text = "\n".join(sources) # Add timing and cost info to answer answer_with_meta = f"{answer}\n\n---\n**Processing Time:** {processing_time:.2f}s\n**Estimated Cost:** ${total_cost:.6f}\n**Tokens:** Query: {query_tokens:.0f}, Context: {context_tokens:.0f}, Answer: {answer_tokens:.0f}" return answer_with_meta, sources_text except Exception as e: return f"Error: {e}", "" def build_ui() -> gr.Blocks: with gr.Blocks(title="Mini RAG - Track B Assessment") as demo: gr.Markdown(""" ## Mini RAG - Track B Assessment **Goal:** Build and host a small RAG app with text input, vector storage, retrieval + reranking, and LLM answering with citations. ### Features: - **Text Input/Upload:** Paste text or upload files (.txt, .md, .pdf) - **Vector Storage:** Pinecone cloud-hosted vector database - **Retrieval + Reranking:** Top-k retrieval with optional Cohere reranker - **LLM Answering:** OpenAI/Groq with inline citations [1], [2] - **Metrics:** Request timing and cost estimates """) with gr.Tab("Text Input"): gr.Markdown("### Process Text Input") text_input = gr.Textbox(label="Paste your text here", lines=10, placeholder="Enter or paste your document text here...") chunk_size = gr.Slider(400, 1200, value=800, step=100, label="Chunk Size (tokens)") chunk_overlap = gr.Slider(50, 200, value=120, step=10, label="Chunk Overlap (tokens)") process_btn = gr.Button("Process & Store Text") process_out = gr.Textbox(label="Status") process_btn.click(process_text_input, inputs=[text_input, chunk_size, chunk_overlap], outputs=[process_out]) with gr.Tab("File Ingestion"): gr.Markdown("### Ingest Files from Directory") data_dir = gr.Textbox(label="Data directory", value=os.getenv("DATA_DIR", "./data")) ingest_btn = gr.Button("Run Ingestion") ingest_out = gr.Textbox(label="Status") ingest_btn.click(run_ingest, inputs=[data_dir], outputs=[ingest_out]) with gr.Tab("Query"): gr.Markdown("### Ask Questions") query = gr.Textbox(label="Question", lines=3, placeholder="Ask a question about your stored documents...") top_k = gr.Slider(1, 20, value=5, step=1, label="Top K retrieval") use_reranker = gr.Checkbox(value=True, label="Use reranker (Cohere)") submit = gr.Button("Ask Question") answer = gr.Markdown(label="Answer with Citations") sources = gr.Markdown(label="Sources") submit.click(answer_query, inputs=[query, top_k, use_reranker], outputs=[answer, sources]) with gr.Tab("Evaluation"): gr.Markdown(""" ### Evaluation Examples (Gold Set) **Sample Q&A pairs for testing:** 1. **Q:** What is the main topic of the document? **Expected:** Clear identification of document subject 2. **Q:** What are the key findings or conclusions? **Expected:** Specific facts or conclusions from the text 3. **Q:** What methodology was used? **Expected:** Description of approach or methods mentioned 4. **Q:** What are the limitations discussed? **Expected:** Any limitations or constraints mentioned 5. **Q:** What future work is suggested? **Expected:** Recommendations or future directions **Success Metrics:** - **Precision:** Relevant information in answers - **Recall:** Coverage of available information - **Citation Accuracy:** Proper source attribution """) return demo if __name__ == "__main__": ui = build_ui() ui.launch()