Spaces:
Sleeping
Sleeping
| import os | |
| import time | |
| import gradio as gr | |
| from dotenv import load_dotenv | |
| from ingest import ingest | |
| from rag_core import RAGCore | |
| load_dotenv() | |
| rag = RAGCore() | |
| def run_ingest(data_dir: str) -> str: | |
| try: | |
| count = ingest(data_dir=data_dir or os.getenv("DATA_DIR", "./data")) | |
| return f"Ingestion complete. Chunks ingested: {count}" | |
| except Exception as e: | |
| return f"Ingestion failed: {e}" | |
| def process_text_input(text: str, chunk_size: int, chunk_overlap: int) -> str: | |
| """Process uploaded/pasted text and store in vector DB""" | |
| try: | |
| if not text.strip(): | |
| return "No text provided" | |
| # Create temporary file for ingestion | |
| temp_dir = "./temp_upload" | |
| os.makedirs(temp_dir, exist_ok=True) | |
| temp_file = os.path.join(temp_dir, "user_input.txt") | |
| with open(temp_file, "w", encoding="utf-8") as f: | |
| f.write(text) | |
| # Ingest the text | |
| count = ingest(data_dir=temp_dir, chunk_size=chunk_size, chunk_overlap=chunk_overlap) | |
| # Clean up | |
| os.remove(temp_file) | |
| os.rmdir(temp_dir) | |
| return f"Text processed and stored: {count} chunks created" | |
| except Exception as e: | |
| return f"Text processing failed: {e}" | |
| def answer_query(query: str, top_k: int, use_reranker: bool): | |
| try: | |
| start_time = time.time() | |
| # Retrieve and rerank | |
| docs, contexts = rag.retrieve(query, top_k=top_k, rerank=use_reranker) | |
| # Generate answer with inline citations | |
| answer = rag.generate_with_citations(query, contexts) | |
| # Calculate timing and estimates | |
| end_time = time.time() | |
| processing_time = end_time - start_time | |
| # Rough token estimates (very approximate) | |
| query_tokens = len(query.split()) * 1.3 # rough tokenization | |
| context_tokens = sum(len(c.split()) * 1.3 for c in contexts) | |
| answer_tokens = len(answer.split()) * 1.3 | |
| # Cost estimates (rough, based on typical pricing) | |
| embedding_cost = (query_tokens + context_tokens) * 0.0001 / 1000 # $0.0001 per 1K tokens | |
| llm_cost = answer_tokens * 0.00003 / 1000 # $0.00003 per 1K tokens for GPT-4o-mini | |
| rerank_cost = len(contexts) * 0.0001 if use_reranker else 0 # $0.0001 per document | |
| total_cost = embedding_cost + llm_cost + rerank_cost | |
| # Format sources with citation numbers | |
| sources = [] | |
| for i, doc in enumerate(docs): | |
| source_info = f"[{i+1}] {doc['metadata'].get('source', 'Unknown')}" | |
| if 'rerank_score' in doc: | |
| source_info += f" (rerank: {doc['rerank_score']:.3f})" | |
| else: | |
| source_info += f" (score: {doc.get('score', 0):.3f})" | |
| sources.append(source_info) | |
| sources_text = "\n".join(sources) | |
| # Add timing and cost info to answer | |
| answer_with_meta = f"{answer}\n\n---\n**Processing Time:** {processing_time:.2f}s\n**Estimated Cost:** ${total_cost:.6f}\n**Tokens:** Query: {query_tokens:.0f}, Context: {context_tokens:.0f}, Answer: {answer_tokens:.0f}" | |
| return answer_with_meta, sources_text | |
| except Exception as e: | |
| return f"Error: {e}", "" | |
| def build_ui() -> gr.Blocks: | |
| with gr.Blocks(title="Mini RAG - Track B Assessment") as demo: | |
| gr.Markdown(""" | |
| ## Mini RAG - Track B Assessment | |
| **Goal:** Build and host a small RAG app with text input, vector storage, retrieval + reranking, and LLM answering with citations. | |
| ### Features: | |
| - **Text Input/Upload:** Paste text or upload files (.txt, .md, .pdf) | |
| - **Vector Storage:** Pinecone cloud-hosted vector database | |
| - **Retrieval + Reranking:** Top-k retrieval with optional Cohere reranker | |
| - **LLM Answering:** OpenAI/Groq with inline citations [1], [2] | |
| - **Metrics:** Request timing and cost estimates | |
| """) | |
| with gr.Tab("Text Input"): | |
| gr.Markdown("### Process Text Input") | |
| text_input = gr.Textbox(label="Paste your text here", lines=10, placeholder="Enter or paste your document text here...") | |
| chunk_size = gr.Slider(400, 1200, value=800, step=100, label="Chunk Size (tokens)") | |
| chunk_overlap = gr.Slider(50, 200, value=120, step=10, label="Chunk Overlap (tokens)") | |
| process_btn = gr.Button("Process & Store Text") | |
| process_out = gr.Textbox(label="Status") | |
| process_btn.click(process_text_input, inputs=[text_input, chunk_size, chunk_overlap], outputs=[process_out]) | |
| with gr.Tab("File Ingestion"): | |
| gr.Markdown("### Ingest Files from Directory") | |
| data_dir = gr.Textbox(label="Data directory", value=os.getenv("DATA_DIR", "./data")) | |
| ingest_btn = gr.Button("Run Ingestion") | |
| ingest_out = gr.Textbox(label="Status") | |
| ingest_btn.click(run_ingest, inputs=[data_dir], outputs=[ingest_out]) | |
| with gr.Tab("Query"): | |
| gr.Markdown("### Ask Questions") | |
| query = gr.Textbox(label="Question", lines=3, placeholder="Ask a question about your stored documents...") | |
| top_k = gr.Slider(1, 20, value=5, step=1, label="Top K retrieval") | |
| use_reranker = gr.Checkbox(value=True, label="Use reranker (Cohere)") | |
| submit = gr.Button("Ask Question") | |
| answer = gr.Markdown(label="Answer with Citations") | |
| sources = gr.Markdown(label="Sources") | |
| submit.click(answer_query, inputs=[query, top_k, use_reranker], outputs=[answer, sources]) | |
| with gr.Tab("Evaluation"): | |
| gr.Markdown(""" | |
| ### Evaluation Examples (Gold Set) | |
| **Sample Q&A pairs for testing:** | |
| 1. **Q:** What is the main topic of the document? | |
| **Expected:** Clear identification of document subject | |
| 2. **Q:** What are the key findings or conclusions? | |
| **Expected:** Specific facts or conclusions from the text | |
| 3. **Q:** What methodology was used? | |
| **Expected:** Description of approach or methods mentioned | |
| 4. **Q:** What are the limitations discussed? | |
| **Expected:** Any limitations or constraints mentioned | |
| 5. **Q:** What future work is suggested? | |
| **Expected:** Recommendations or future directions | |
| **Success Metrics:** | |
| - **Precision:** Relevant information in answers | |
| - **Recall:** Coverage of available information | |
| - **Citation Accuracy:** Proper source attribution | |
| """) | |
| return demo | |
| if __name__ == "__main__": | |
| ui = build_ui() | |
| ui.launch() | |