mini-rag / app.py
navyamehta's picture
Upload 11 files
33f5651 verified
import os
import time
import gradio as gr
from dotenv import load_dotenv
from ingest import ingest
from rag_core import RAGCore
load_dotenv()
rag = RAGCore()
def run_ingest(data_dir: str) -> str:
try:
count = ingest(data_dir=data_dir or os.getenv("DATA_DIR", "./data"))
return f"Ingestion complete. Chunks ingested: {count}"
except Exception as e:
return f"Ingestion failed: {e}"
def process_text_input(text: str, chunk_size: int, chunk_overlap: int) -> str:
"""Process uploaded/pasted text and store in vector DB"""
try:
if not text.strip():
return "No text provided"
# Create temporary file for ingestion
temp_dir = "./temp_upload"
os.makedirs(temp_dir, exist_ok=True)
temp_file = os.path.join(temp_dir, "user_input.txt")
with open(temp_file, "w", encoding="utf-8") as f:
f.write(text)
# Ingest the text
count = ingest(data_dir=temp_dir, chunk_size=chunk_size, chunk_overlap=chunk_overlap)
# Clean up
os.remove(temp_file)
os.rmdir(temp_dir)
return f"Text processed and stored: {count} chunks created"
except Exception as e:
return f"Text processing failed: {e}"
def answer_query(query: str, top_k: int, use_reranker: bool):
try:
start_time = time.time()
# Retrieve and rerank
docs, contexts = rag.retrieve(query, top_k=top_k, rerank=use_reranker)
# Generate answer with inline citations
answer = rag.generate_with_citations(query, contexts)
# Calculate timing and estimates
end_time = time.time()
processing_time = end_time - start_time
# Rough token estimates (very approximate)
query_tokens = len(query.split()) * 1.3 # rough tokenization
context_tokens = sum(len(c.split()) * 1.3 for c in contexts)
answer_tokens = len(answer.split()) * 1.3
# Cost estimates (rough, based on typical pricing)
embedding_cost = (query_tokens + context_tokens) * 0.0001 / 1000 # $0.0001 per 1K tokens
llm_cost = answer_tokens * 0.00003 / 1000 # $0.00003 per 1K tokens for GPT-4o-mini
rerank_cost = len(contexts) * 0.0001 if use_reranker else 0 # $0.0001 per document
total_cost = embedding_cost + llm_cost + rerank_cost
# Format sources with citation numbers
sources = []
for i, doc in enumerate(docs):
source_info = f"[{i+1}] {doc['metadata'].get('source', 'Unknown')}"
if 'rerank_score' in doc:
source_info += f" (rerank: {doc['rerank_score']:.3f})"
else:
source_info += f" (score: {doc.get('score', 0):.3f})"
sources.append(source_info)
sources_text = "\n".join(sources)
# Add timing and cost info to answer
answer_with_meta = f"{answer}\n\n---\n**Processing Time:** {processing_time:.2f}s\n**Estimated Cost:** ${total_cost:.6f}\n**Tokens:** Query: {query_tokens:.0f}, Context: {context_tokens:.0f}, Answer: {answer_tokens:.0f}"
return answer_with_meta, sources_text
except Exception as e:
return f"Error: {e}", ""
def build_ui() -> gr.Blocks:
with gr.Blocks(title="Mini RAG - Track B Assessment") as demo:
gr.Markdown("""
## Mini RAG - Track B Assessment
**Goal:** Build and host a small RAG app with text input, vector storage, retrieval + reranking, and LLM answering with citations.
### Features:
- **Text Input/Upload:** Paste text or upload files (.txt, .md, .pdf)
- **Vector Storage:** Pinecone cloud-hosted vector database
- **Retrieval + Reranking:** Top-k retrieval with optional Cohere reranker
- **LLM Answering:** OpenAI/Groq with inline citations [1], [2]
- **Metrics:** Request timing and cost estimates
""")
with gr.Tab("Text Input"):
gr.Markdown("### Process Text Input")
text_input = gr.Textbox(label="Paste your text here", lines=10, placeholder="Enter or paste your document text here...")
chunk_size = gr.Slider(400, 1200, value=800, step=100, label="Chunk Size (tokens)")
chunk_overlap = gr.Slider(50, 200, value=120, step=10, label="Chunk Overlap (tokens)")
process_btn = gr.Button("Process & Store Text")
process_out = gr.Textbox(label="Status")
process_btn.click(process_text_input, inputs=[text_input, chunk_size, chunk_overlap], outputs=[process_out])
with gr.Tab("File Ingestion"):
gr.Markdown("### Ingest Files from Directory")
data_dir = gr.Textbox(label="Data directory", value=os.getenv("DATA_DIR", "./data"))
ingest_btn = gr.Button("Run Ingestion")
ingest_out = gr.Textbox(label="Status")
ingest_btn.click(run_ingest, inputs=[data_dir], outputs=[ingest_out])
with gr.Tab("Query"):
gr.Markdown("### Ask Questions")
query = gr.Textbox(label="Question", lines=3, placeholder="Ask a question about your stored documents...")
top_k = gr.Slider(1, 20, value=5, step=1, label="Top K retrieval")
use_reranker = gr.Checkbox(value=True, label="Use reranker (Cohere)")
submit = gr.Button("Ask Question")
answer = gr.Markdown(label="Answer with Citations")
sources = gr.Markdown(label="Sources")
submit.click(answer_query, inputs=[query, top_k, use_reranker], outputs=[answer, sources])
with gr.Tab("Evaluation"):
gr.Markdown("""
### Evaluation Examples (Gold Set)
**Sample Q&A pairs for testing:**
1. **Q:** What is the main topic of the document?
**Expected:** Clear identification of document subject
2. **Q:** What are the key findings or conclusions?
**Expected:** Specific facts or conclusions from the text
3. **Q:** What methodology was used?
**Expected:** Description of approach or methods mentioned
4. **Q:** What are the limitations discussed?
**Expected:** Any limitations or constraints mentioned
5. **Q:** What future work is suggested?
**Expected:** Recommendations or future directions
**Success Metrics:**
- **Precision:** Relevant information in answers
- **Recall:** Coverage of available information
- **Citation Accuracy:** Proper source attribution
""")
return demo
if __name__ == "__main__":
ui = build_ui()
ui.launch()