Spaces:
Sleeping
Sleeping
João Lima
commited on
Commit
·
3a93742
1
Parent(s):
7baa19c
Initial Commit
Browse files- README.md +24 -13
- __pycache__/config.cpython-312.pyc +0 -0
- app.py +43 -0
- config.py +3 -0
- evaluation/metrics.py +33 -0
- ingestion/__pycache__/pdf.cpython-312.pyc +0 -0
- ingestion/pdf.py +16 -0
- logs/metrics.jsonl +1 -0
- rag/__pycache__/llm.cpython-312.pyc +0 -0
- rag/__pycache__/pipeline.cpython-312.pyc +0 -0
- rag/__pycache__/retriever.cpython-312.pyc +0 -0
- rag/llm.py +28 -0
- rag/pipeline.py +19 -0
- rag/retriever.py +12 -0
- requirements.txt +12 -0
README.md
CHANGED
|
@@ -1,13 +1,24 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
This project demonstrates a **lightweight yet production-realistic RAG architecture**.
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
### Concepts demonstrated
|
| 5 |
+
- Retrieval-Augmented Generation
|
| 6 |
+
- Model fallback
|
| 7 |
+
- Automatic evaluation (LLM-as-a-Judge)
|
| 8 |
+
- Metrics logging
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
### Run locally
|
| 12 |
+
```bash
|
| 13 |
+
pip install -r requirements.txt
|
| 14 |
+
python app.py
|
| 15 |
+
```
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
### Deploy
|
| 19 |
+
- Hugging Face Spaces
|
| 20 |
+
- Docker
|
| 21 |
+
- Cloud GPU environments
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
---
|
__pycache__/config.cpython-312.pyc
ADDED
|
Binary file (303 Bytes). View file
|
|
|
app.py
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
from ingestion.pdf import process_pdf
|
| 3 |
+
from rag.pipeline import run_rag
|
| 4 |
+
|
| 5 |
+
vectorstore = None
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
def load_document(file):
|
| 9 |
+
global vectorstore
|
| 10 |
+
vectorstore = process_pdf(file)
|
| 11 |
+
return "Document processed successfully."
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
def ask(question):
|
| 15 |
+
if vectorstore is None:
|
| 16 |
+
return "Upload a document first", "", ""
|
| 17 |
+
return run_rag(question, vectorstore)
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
with gr.Blocks(title="Tech Explainer RAG") as demo:
|
| 21 |
+
gr.Markdown("# Tech Explainer — RAG with Automatic Evaluation")
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
file = gr.File(file_types=[".pdf"])
|
| 25 |
+
load_btn = gr.Button("Process PDF")
|
| 26 |
+
status = gr.Textbox()
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
question = gr.Textbox(label="Question")
|
| 30 |
+
ask_btn = gr.Button("Ask")
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
answer = gr.Textbox(label="Answer")
|
| 34 |
+
sources = gr.Textbox(label="Sources")
|
| 35 |
+
evaluation = gr.Textbox(label="Evaluation")
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
load_btn.click(load_document, file, status)
|
| 39 |
+
ask_btn.click(ask, question, [answer, sources, evaluation])
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
if __name__ == "__main__":
|
| 43 |
+
demo.launch()
|
config.py
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
|
| 2 |
+
PRIMARY_LLM = "mistralai/Mistral-7B-Instruct-v0.2"
|
| 3 |
+
FALLBACK_LLM = "microsoft/phi-2"
|
evaluation/metrics.py
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import json
|
| 2 |
+
from datetime import datetime
|
| 3 |
+
from rag.llm import generate
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
LOG_PATH = "logs/metrics.jsonl"
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
def evaluate_and_log(question, context, answer):
|
| 10 |
+
prompt = (
|
| 11 |
+
"Evaluate the answer based on faithfulness, clarity, usefulness and completeness. "
|
| 12 |
+
"Provide a short justification.\n"
|
| 13 |
+
f"Context:\n{context}\n"
|
| 14 |
+
f"Answer:\n{answer}"
|
| 15 |
+
)
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
evaluation = generate(prompt, max_tokens=200)
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
record = {
|
| 22 |
+
"timestamp": datetime.utcnow().isoformat(),
|
| 23 |
+
"question": question,
|
| 24 |
+
"answer": answer,
|
| 25 |
+
"evaluation": evaluation
|
| 26 |
+
}
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
with open(LOG_PATH, "a") as f:
|
| 30 |
+
f.write(json.dumps(record) + "\n")
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
return evaluation
|
ingestion/__pycache__/pdf.cpython-312.pyc
ADDED
|
Binary file (1.25 kB). View file
|
|
|
ingestion/pdf.py
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from pypdf import PdfReader
|
| 2 |
+
from langchain_text_splitters import RecursiveCharacterTextSplitter
|
| 3 |
+
from langchain_core.documents import Document
|
| 4 |
+
from rag.retriever import build_vectorstore
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
def process_pdf(file):
|
| 8 |
+
reader = PdfReader(file.name)
|
| 9 |
+
text = "".join(page.extract_text() or "" for page in reader.pages)
|
| 10 |
+
|
| 11 |
+
splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
|
| 12 |
+
|
| 13 |
+
chunks = splitter.split_text(text)
|
| 14 |
+
documents = [Document(page_content=c) for c in chunks]
|
| 15 |
+
|
| 16 |
+
return build_vectorstore(documents)
|
logs/metrics.jsonl
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
|
rag/__pycache__/llm.cpython-312.pyc
ADDED
|
Binary file (1.78 kB). View file
|
|
|
rag/__pycache__/pipeline.cpython-312.pyc
ADDED
|
Binary file (1.05 kB). View file
|
|
|
rag/__pycache__/retriever.cpython-312.pyc
ADDED
|
Binary file (605 Bytes). View file
|
|
|
rag/llm.py
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM
|
| 3 |
+
from config import PRIMARY_LLM, FALLBACK_LLM
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
def load_model():
|
| 7 |
+
try:
|
| 8 |
+
tokenizer = AutoTokenizer.from_pretrained(PRIMARY_LLM)
|
| 9 |
+
model = AutoModelForCausalLM.from_pretrained(
|
| 10 |
+
PRIMARY_LLM, device_map="auto", load_in_8bit=True
|
| 11 |
+
)
|
| 12 |
+
except Exception:
|
| 13 |
+
tokenizer = AutoTokenizer.from_pretrained(FALLBACK_LLM, trust_remote_code=True)
|
| 14 |
+
model = AutoModelForCausalLM.from_pretrained(FALLBACK_LLM)
|
| 15 |
+
|
| 16 |
+
return tokenizer, model
|
| 17 |
+
|
| 18 |
+
tokenizer, model = load_model()
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
def generate(prompt, max_tokens=400):
|
| 22 |
+
inputs = tokenizer(prompt, return_tensors="pt", truncation=True)
|
| 23 |
+
inputs = {k: v.to(model.device) for k, v in inputs.items()}
|
| 24 |
+
|
| 25 |
+
with torch.no_grad():
|
| 26 |
+
output = model.generate(**inputs, max_new_tokens=max_tokens)
|
| 27 |
+
|
| 28 |
+
return tokenizer.decode(output[0], skip_special_tokens=True)
|
rag/pipeline.py
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from rag.llm import generate
|
| 2 |
+
from evaluation.metrics import evaluate_and_log
|
| 3 |
+
|
| 4 |
+
def run_rag(question, vectorstore):
|
| 5 |
+
docs = vectorstore.similarity_search(question, k=3)
|
| 6 |
+
context = "\n".join(d.page_content for d in docs)
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
prompt = (
|
| 10 |
+
"Use the context below to answer the question clearly and simply.\n"
|
| 11 |
+
f"Context:\n{context}\n"
|
| 12 |
+
f"Question: {question}"
|
| 13 |
+
)
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
answer = generate(prompt)
|
| 17 |
+
evaluation = evaluate_and_log(question, context, answer)
|
| 18 |
+
|
| 19 |
+
return answer, "Sources retrieved from document", evaluation
|
rag/retriever.py
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from langchain_community.embeddings import HuggingFaceEmbeddings
|
| 2 |
+
from langchain_community.vectorstores import FAISS
|
| 3 |
+
from config import EMBEDDING_MODEL
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
_embeddings = HuggingFaceEmbeddings(model_name=EMBEDDING_MODEL)
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
def build_vectorstore(documents):
|
| 12 |
+
return FAISS.from_documents(documents, _embeddings)
|
requirements.txt
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
gradio
|
| 2 |
+
transformers
|
| 3 |
+
sentence-transformers
|
| 4 |
+
torch
|
| 5 |
+
pypdf
|
| 6 |
+
|
| 7 |
+
langchain
|
| 8 |
+
langchain-core
|
| 9 |
+
langchain-community
|
| 10 |
+
langchain-text-splitters
|
| 11 |
+
|
| 12 |
+
faiss-cpu
|