Spaces:

limapjoao
/

tech-explainer-rag

Sleeping

João Lima commited on Jan 24

Commit

3a93742

1 Parent(s): 7baa19c

Initial Commit

Files changed (15) hide show

README.md CHANGED Viewed

@@ -1,13 +1,24 @@
----
-title: Tech Explainer Rag
-emoji: 🚀
-colorFrom: indigo
-colorTo: blue
-sdk: gradio
-sdk_version: 6.4.0
-app_file: app.py
-pinned: false
-license: mit
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

+This project demonstrates a **lightweight yet production-realistic RAG architecture**.
+### Concepts demonstrated
+- Retrieval-Augmented Generation
+- Model fallback
+- Automatic evaluation (LLM-as-a-Judge)
+- Metrics logging
+### Run locally
+```bash
+pip install -r requirements.txt
+python app.py
+```
+### Deploy
+- Hugging Face Spaces
+- Docker
+- Cloud GPU environments
+---

__pycache__/config.cpython-312.pyc ADDED Viewed

Binary file (303 Bytes). View file

app.py ADDED Viewed

+import gradio as gr
+from ingestion.pdf import process_pdf
+from rag.pipeline import run_rag
+vectorstore = None
+def load_document(file):
+    global vectorstore
+    vectorstore = process_pdf(file)
+    return "Document processed successfully."
+def ask(question):
+    if vectorstore is None:
+        return "Upload a document first", "", ""
+        return run_rag(question, vectorstore)
+with gr.Blocks(title="Tech Explainer RAG") as demo:
+    gr.Markdown("# Tech Explainer — RAG with Automatic Evaluation")
+file = gr.File(file_types=[".pdf"])
+load_btn = gr.Button("Process PDF")
+status = gr.Textbox()
+question = gr.Textbox(label="Question")
+ask_btn = gr.Button("Ask")
+answer = gr.Textbox(label="Answer")
+sources = gr.Textbox(label="Sources")
+evaluation = gr.Textbox(label="Evaluation")
+load_btn.click(load_document, file, status)
+ask_btn.click(ask, question, [answer, sources, evaluation])
+if __name__ == "__main__":
+    demo.launch()

config.py ADDED Viewed

+EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
+PRIMARY_LLM = "mistralai/Mistral-7B-Instruct-v0.2"
+FALLBACK_LLM = "microsoft/phi-2"

evaluation/metrics.py ADDED Viewed

+import json
+from datetime import datetime
+from rag.llm import generate
+LOG_PATH = "logs/metrics.jsonl"
+def evaluate_and_log(question, context, answer):
+    prompt = (
+    "Evaluate the answer based on faithfulness, clarity, usefulness and completeness. "
+    "Provide a short justification.\n"
+    f"Context:\n{context}\n"
+    f"Answer:\n{answer}"
+    )
+    evaluation = generate(prompt, max_tokens=200)
+    record = {
+    "timestamp": datetime.utcnow().isoformat(),
+    "question": question,
+    "answer": answer,
+    "evaluation": evaluation
+    }
+    with open(LOG_PATH, "a") as f:
+        f.write(json.dumps(record) + "\n")
+    return evaluation

ingestion/__pycache__/pdf.cpython-312.pyc ADDED Viewed

Binary file (1.25 kB). View file

ingestion/pdf.py ADDED Viewed

+from pypdf import PdfReader
+from langchain_text_splitters import RecursiveCharacterTextSplitter
+from langchain_core.documents import Document
+from rag.retriever import build_vectorstore
+def process_pdf(file):
+    reader = PdfReader(file.name)
+    text = "".join(page.extract_text() or "" for page in reader.pages)
+    splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
+    chunks = splitter.split_text(text)
+    documents = [Document(page_content=c) for c in chunks]
+    return build_vectorstore(documents)

logs/metrics.jsonl ADDED Viewed

	@@ -0,0 +1 @@


1	+

rag/__pycache__/llm.cpython-312.pyc ADDED Viewed

Binary file (1.78 kB). View file

rag/__pycache__/pipeline.cpython-312.pyc ADDED Viewed

Binary file (1.05 kB). View file

rag/__pycache__/retriever.cpython-312.pyc ADDED Viewed

Binary file (605 Bytes). View file

rag/llm.py ADDED Viewed

+import torch
+from transformers import AutoTokenizer, AutoModelForCausalLM
+from config import PRIMARY_LLM, FALLBACK_LLM
+def load_model():
+    try:
+        tokenizer = AutoTokenizer.from_pretrained(PRIMARY_LLM)
+        model = AutoModelForCausalLM.from_pretrained(
+            PRIMARY_LLM, device_map="auto", load_in_8bit=True
+        )
+    except Exception:
+        tokenizer = AutoTokenizer.from_pretrained(FALLBACK_LLM, trust_remote_code=True)
+        model = AutoModelForCausalLM.from_pretrained(FALLBACK_LLM)
+    return tokenizer, model
+tokenizer, model = load_model()
+def generate(prompt, max_tokens=400):
+    inputs = tokenizer(prompt, return_tensors="pt", truncation=True)
+    inputs = {k: v.to(model.device) for k, v in inputs.items()}
+    with torch.no_grad():
+        output = model.generate(**inputs, max_new_tokens=max_tokens)
+    return tokenizer.decode(output[0], skip_special_tokens=True)

rag/pipeline.py ADDED Viewed

+from rag.llm import generate
+from evaluation.metrics import evaluate_and_log
+def run_rag(question, vectorstore):
+    docs = vectorstore.similarity_search(question, k=3)
+    context = "\n".join(d.page_content for d in docs)
+    prompt = (
+    "Use the context below to answer the question clearly and simply.\n"
+    f"Context:\n{context}\n"
+    f"Question: {question}"
+    )
+    answer = generate(prompt)
+    evaluation = evaluate_and_log(question, context, answer)
+    return answer, "Sources retrieved from document", evaluation

rag/retriever.py ADDED Viewed

+from langchain_community.embeddings import HuggingFaceEmbeddings
+from langchain_community.vectorstores import FAISS
+from config import EMBEDDING_MODEL
+_embeddings = HuggingFaceEmbeddings(model_name=EMBEDDING_MODEL)
+def build_vectorstore(documents):
+    return FAISS.from_documents(documents, _embeddings)

requirements.txt ADDED Viewed

+gradio
+transformers
+sentence-transformers
+torch
+pypdf
+langchain
+langchain-core
+langchain-community
+langchain-text-splitters
+faiss-cpu