RAG-system / app.py
Nadun102's picture
Update app.py
c471de9 verified
import gradio as gr
from transformers import pipeline
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np
import json
import pickle
import requests
import os
HF_REPO = os.getenv("HF_REPO", "Nadun102/RAG-system")
# -----------------------
# 1. Load LLM
# -----------------------
generator = pipeline(
"text-generation",
model="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
max_length=300,
temperature=0.3
)
# -----------------------
# 2. Load embeddings
# -----------------------
embedder = SentenceTransformer("all-MiniLM-L6-v2")
# -----------------------
# 3. Download files from HF repo
# -----------------------
def hf_download(path):
url = f"https://huggingface.co/{HF_REPO}/resolve/main/{path}"
return requests.get(url).content
chunks = json.loads(hf_download("chunks.json"))
with open("metadata.pkl", "wb") as f:
f.write(hf_download("metadata.pkl"))
metadata = pickle.load(open("metadata.pkl", "rb"))
with open("faiss_index.bin", "wb") as f:
f.write(hf_download("faiss_index.bin"))
faiss_index = faiss.read_index("faiss_index.bin")
# -----------------------
# 4. RAG Retrieval
# -----------------------
def retrieve(query):
q_emb = embedder.encode([query])
scores, indices = faiss_index.search(q_emb, 3)
ctx = []
for idx in indices[0]:
ctx.append(chunks[idx])
return "\n".join(ctx)
# -----------------------
# 5. Generate answer
# -----------------------
def answer_fn(message, history):
context = retrieve(message)
prompt = f"""
Use the following context to answer the question clearly:
CONTEXT:
{context}
QUESTION:
{message}
ANSWER:
"""
output = generator(prompt)[0]["generated_text"]
return output
# -----------------------
# 6. Gradio Chat UI
# -----------------------
chatbot = gr.ChatInterface(fn=answer_fn)
if __name__ == "__main__":
chatbot.launch()