Spaces:

navyamehta
/

mini-rag

Sleeping

App Files Files Community

mini-rag / app.py

navyamehta

Upload 11 files

33f5651 verified 6 months ago

raw

history blame contribute delete

6.88 kB

	import os
	import time
	import gradio as gr
	from dotenv import load_dotenv

	from ingest import ingest
	from rag_core import RAGCore

	load_dotenv()

	rag = RAGCore()


	def run_ingest(data_dir: str) -> str:
	try:
	count = ingest(data_dir=data_dir or os.getenv("DATA_DIR", "./data"))
	return f"Ingestion complete. Chunks ingested: {count}"
	except Exception as e:
	return f"Ingestion failed: {e}"


	def process_text_input(text: str, chunk_size: int, chunk_overlap: int) -> str:
	"""Process uploaded/pasted text and store in vector DB"""
	try:
	if not text.strip():
	return "No text provided"

	# Create temporary file for ingestion
	temp_dir = "./temp_upload"
	os.makedirs(temp_dir, exist_ok=True)
	temp_file = os.path.join(temp_dir, "user_input.txt")

	with open(temp_file, "w", encoding="utf-8") as f:
	f.write(text)

	# Ingest the text
	count = ingest(data_dir=temp_dir, chunk_size=chunk_size, chunk_overlap=chunk_overlap)

	# Clean up
	os.remove(temp_file)
	os.rmdir(temp_dir)

	return f"Text processed and stored: {count} chunks created"
	except Exception as e:
	return f"Text processing failed: {e}"


	def answer_query(query: str, top_k: int, use_reranker: bool):
	try:
	start_time = time.time()

	# Retrieve and rerank
	docs, contexts = rag.retrieve(query, top_k=top_k, rerank=use_reranker)

	# Generate answer with inline citations
	answer = rag.generate_with_citations(query, contexts)

	# Calculate timing and estimates
	end_time = time.time()
	processing_time = end_time - start_time

	# Rough token estimates (very approximate)
	query_tokens = len(query.split()) * 1.3 # rough tokenization
	context_tokens = sum(len(c.split()) * 1.3 for c in contexts)
	answer_tokens = len(answer.split()) * 1.3

	# Cost estimates (rough, based on typical pricing)
	embedding_cost = (query_tokens + context_tokens) * 0.0001 / 1000 # $0.0001 per 1K tokens
	llm_cost = answer_tokens * 0.00003 / 1000 # $0.00003 per 1K tokens for GPT-4o-mini
	rerank_cost = len(contexts) * 0.0001 if use_reranker else 0 # $0.0001 per document

	total_cost = embedding_cost + llm_cost + rerank_cost

	# Format sources with citation numbers
	sources = []
	for i, doc in enumerate(docs):
	source_info = f"[{i+1}] {doc['metadata'].get('source', 'Unknown')}"
	if 'rerank_score' in doc:
	source_info += f" (rerank: {doc['rerank_score']:.3f})"
	else:
	source_info += f" (score: {doc.get('score', 0):.3f})"
	sources.append(source_info)

	sources_text = "\n".join(sources)

	# Add timing and cost info to answer
	answer_with_meta = f"{answer}\n\n---\nProcessing Time: {processing_time:.2f}s\nEstimated Cost: ${total_cost:.6f}\nTokens: Query: {query_tokens:.0f}, Context: {context_tokens:.0f}, Answer: {answer_tokens:.0f}"

	return answer_with_meta, sources_text
	except Exception as e:
	return f"Error: {e}", ""


	def build_ui() -> gr.Blocks:
	with gr.Blocks(title="Mini RAG - Track B Assessment") as demo:
	gr.Markdown("""
	## Mini RAG - Track B Assessment
	Goal: Build and host a small RAG app with text input, vector storage, retrieval + reranking, and LLM answering with citations.

	### Features:
	- Text Input/Upload: Paste text or upload files (.txt, .md, .pdf)
	- Vector Storage: Pinecone cloud-hosted vector database
	- Retrieval + Reranking: Top-k retrieval with optional Cohere reranker
	- LLM Answering: OpenAI/Groq with inline citations [1], [2]
	- Metrics: Request timing and cost estimates
	""")

	with gr.Tab("Text Input"):
	gr.Markdown("### Process Text Input")
	text_input = gr.Textbox(label="Paste your text here", lines=10, placeholder="Enter or paste your document text here...")
	chunk_size = gr.Slider(400, 1200, value=800, step=100, label="Chunk Size (tokens)")
	chunk_overlap = gr.Slider(50, 200, value=120, step=10, label="Chunk Overlap (tokens)")
	process_btn = gr.Button("Process & Store Text")
	process_out = gr.Textbox(label="Status")
	process_btn.click(process_text_input, inputs=[text_input, chunk_size, chunk_overlap], outputs=[process_out])

	with gr.Tab("File Ingestion"):
	gr.Markdown("### Ingest Files from Directory")
	data_dir = gr.Textbox(label="Data directory", value=os.getenv("DATA_DIR", "./data"))
	ingest_btn = gr.Button("Run Ingestion")
	ingest_out = gr.Textbox(label="Status")
	ingest_btn.click(run_ingest, inputs=[data_dir], outputs=[ingest_out])

	with gr.Tab("Query"):
	gr.Markdown("### Ask Questions")
	query = gr.Textbox(label="Question", lines=3, placeholder="Ask a question about your stored documents...")
	top_k = gr.Slider(1, 20, value=5, step=1, label="Top K retrieval")
	use_reranker = gr.Checkbox(value=True, label="Use reranker (Cohere)")
	submit = gr.Button("Ask Question")
	answer = gr.Markdown(label="Answer with Citations")
	sources = gr.Markdown(label="Sources")
	submit.click(answer_query, inputs=[query, top_k, use_reranker], outputs=[answer, sources])

	with gr.Tab("Evaluation"):
	gr.Markdown("""
	### Evaluation Examples (Gold Set)

	Sample Q&A pairs for testing:

	1. Q: What is the main topic of the document?
	Expected: Clear identification of document subject

	2. Q: What are the key findings or conclusions?
	Expected: Specific facts or conclusions from the text

	3. Q: What methodology was used?
	Expected: Description of approach or methods mentioned

	4. Q: What are the limitations discussed?
	Expected: Any limitations or constraints mentioned

	5. Q: What future work is suggested?
	Expected: Recommendations or future directions

	Success Metrics:
	- Precision: Relevant information in answers
	- Recall: Coverage of available information
	- Citation Accuracy: Proper source attribution
	""")

	return demo


	if __name__ == "__main__":
	ui = build_ui()
	ui.launch()