João Lima commited on
Commit
3a93742
·
1 Parent(s): 7baa19c

Initial Commit

Browse files
README.md CHANGED
@@ -1,13 +1,24 @@
1
- ---
2
- title: Tech Explainer Rag
3
- emoji: 🚀
4
- colorFrom: indigo
5
- colorTo: blue
6
- sdk: gradio
7
- sdk_version: 6.4.0
8
- app_file: app.py
9
- pinned: false
10
- license: mit
11
- ---
12
-
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
1
+ This project demonstrates a **lightweight yet production-realistic RAG architecture**.
2
+
3
+
4
+ ### Concepts demonstrated
5
+ - Retrieval-Augmented Generation
6
+ - Model fallback
7
+ - Automatic evaluation (LLM-as-a-Judge)
8
+ - Metrics logging
9
+
10
+
11
+ ### Run locally
12
+ ```bash
13
+ pip install -r requirements.txt
14
+ python app.py
15
+ ```
16
+
17
+
18
+ ### Deploy
19
+ - Hugging Face Spaces
20
+ - Docker
21
+ - Cloud GPU environments
22
+
23
+
24
+ ---
__pycache__/config.cpython-312.pyc ADDED
Binary file (303 Bytes). View file
 
app.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from ingestion.pdf import process_pdf
3
+ from rag.pipeline import run_rag
4
+
5
+ vectorstore = None
6
+
7
+
8
+ def load_document(file):
9
+ global vectorstore
10
+ vectorstore = process_pdf(file)
11
+ return "Document processed successfully."
12
+
13
+
14
+ def ask(question):
15
+ if vectorstore is None:
16
+ return "Upload a document first", "", ""
17
+ return run_rag(question, vectorstore)
18
+
19
+
20
+ with gr.Blocks(title="Tech Explainer RAG") as demo:
21
+ gr.Markdown("# Tech Explainer — RAG with Automatic Evaluation")
22
+
23
+
24
+ file = gr.File(file_types=[".pdf"])
25
+ load_btn = gr.Button("Process PDF")
26
+ status = gr.Textbox()
27
+
28
+
29
+ question = gr.Textbox(label="Question")
30
+ ask_btn = gr.Button("Ask")
31
+
32
+
33
+ answer = gr.Textbox(label="Answer")
34
+ sources = gr.Textbox(label="Sources")
35
+ evaluation = gr.Textbox(label="Evaluation")
36
+
37
+
38
+ load_btn.click(load_document, file, status)
39
+ ask_btn.click(ask, question, [answer, sources, evaluation])
40
+
41
+
42
+ if __name__ == "__main__":
43
+ demo.launch()
config.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
2
+ PRIMARY_LLM = "mistralai/Mistral-7B-Instruct-v0.2"
3
+ FALLBACK_LLM = "microsoft/phi-2"
evaluation/metrics.py ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ from datetime import datetime
3
+ from rag.llm import generate
4
+
5
+
6
+ LOG_PATH = "logs/metrics.jsonl"
7
+
8
+
9
+ def evaluate_and_log(question, context, answer):
10
+ prompt = (
11
+ "Evaluate the answer based on faithfulness, clarity, usefulness and completeness. "
12
+ "Provide a short justification.\n"
13
+ f"Context:\n{context}\n"
14
+ f"Answer:\n{answer}"
15
+ )
16
+
17
+
18
+ evaluation = generate(prompt, max_tokens=200)
19
+
20
+
21
+ record = {
22
+ "timestamp": datetime.utcnow().isoformat(),
23
+ "question": question,
24
+ "answer": answer,
25
+ "evaluation": evaluation
26
+ }
27
+
28
+
29
+ with open(LOG_PATH, "a") as f:
30
+ f.write(json.dumps(record) + "\n")
31
+
32
+
33
+ return evaluation
ingestion/__pycache__/pdf.cpython-312.pyc ADDED
Binary file (1.25 kB). View file
 
ingestion/pdf.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pypdf import PdfReader
2
+ from langchain_text_splitters import RecursiveCharacterTextSplitter
3
+ from langchain_core.documents import Document
4
+ from rag.retriever import build_vectorstore
5
+
6
+
7
+ def process_pdf(file):
8
+ reader = PdfReader(file.name)
9
+ text = "".join(page.extract_text() or "" for page in reader.pages)
10
+
11
+ splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
12
+
13
+ chunks = splitter.split_text(text)
14
+ documents = [Document(page_content=c) for c in chunks]
15
+
16
+ return build_vectorstore(documents)
logs/metrics.jsonl ADDED
@@ -0,0 +1 @@
 
 
1
+
rag/__pycache__/llm.cpython-312.pyc ADDED
Binary file (1.78 kB). View file
 
rag/__pycache__/pipeline.cpython-312.pyc ADDED
Binary file (1.05 kB). View file
 
rag/__pycache__/retriever.cpython-312.pyc ADDED
Binary file (605 Bytes). View file
 
rag/llm.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from transformers import AutoTokenizer, AutoModelForCausalLM
3
+ from config import PRIMARY_LLM, FALLBACK_LLM
4
+
5
+
6
+ def load_model():
7
+ try:
8
+ tokenizer = AutoTokenizer.from_pretrained(PRIMARY_LLM)
9
+ model = AutoModelForCausalLM.from_pretrained(
10
+ PRIMARY_LLM, device_map="auto", load_in_8bit=True
11
+ )
12
+ except Exception:
13
+ tokenizer = AutoTokenizer.from_pretrained(FALLBACK_LLM, trust_remote_code=True)
14
+ model = AutoModelForCausalLM.from_pretrained(FALLBACK_LLM)
15
+
16
+ return tokenizer, model
17
+
18
+ tokenizer, model = load_model()
19
+
20
+
21
+ def generate(prompt, max_tokens=400):
22
+ inputs = tokenizer(prompt, return_tensors="pt", truncation=True)
23
+ inputs = {k: v.to(model.device) for k, v in inputs.items()}
24
+
25
+ with torch.no_grad():
26
+ output = model.generate(**inputs, max_new_tokens=max_tokens)
27
+
28
+ return tokenizer.decode(output[0], skip_special_tokens=True)
rag/pipeline.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from rag.llm import generate
2
+ from evaluation.metrics import evaluate_and_log
3
+
4
+ def run_rag(question, vectorstore):
5
+ docs = vectorstore.similarity_search(question, k=3)
6
+ context = "\n".join(d.page_content for d in docs)
7
+
8
+
9
+ prompt = (
10
+ "Use the context below to answer the question clearly and simply.\n"
11
+ f"Context:\n{context}\n"
12
+ f"Question: {question}"
13
+ )
14
+
15
+
16
+ answer = generate(prompt)
17
+ evaluation = evaluate_and_log(question, context, answer)
18
+
19
+ return answer, "Sources retrieved from document", evaluation
rag/retriever.py ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_community.embeddings import HuggingFaceEmbeddings
2
+ from langchain_community.vectorstores import FAISS
3
+ from config import EMBEDDING_MODEL
4
+
5
+
6
+ _embeddings = HuggingFaceEmbeddings(model_name=EMBEDDING_MODEL)
7
+
8
+
9
+
10
+
11
+ def build_vectorstore(documents):
12
+ return FAISS.from_documents(documents, _embeddings)
requirements.txt ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ gradio
2
+ transformers
3
+ sentence-transformers
4
+ torch
5
+ pypdf
6
+
7
+ langchain
8
+ langchain-core
9
+ langchain-community
10
+ langchain-text-splitters
11
+
12
+ faiss-cpu