import gradio as gr from transformers import pipeline from sentence_transformers import SentenceTransformer import faiss import numpy as np import json import pickle import requests import os HF_REPO = os.getenv("HF_REPO", "Nadun102/RAG-system") # ----------------------- # 1. Load LLM # ----------------------- generator = pipeline( "text-generation", model="TinyLlama/TinyLlama-1.1B-Chat-v1.0", max_length=300, temperature=0.3 ) # ----------------------- # 2. Load embeddings # ----------------------- embedder = SentenceTransformer("all-MiniLM-L6-v2") # ----------------------- # 3. Download files from HF repo # ----------------------- def hf_download(path): url = f"https://huggingface.co/{HF_REPO}/resolve/main/{path}" return requests.get(url).content chunks = json.loads(hf_download("chunks.json")) with open("metadata.pkl", "wb") as f: f.write(hf_download("metadata.pkl")) metadata = pickle.load(open("metadata.pkl", "rb")) with open("faiss_index.bin", "wb") as f: f.write(hf_download("faiss_index.bin")) faiss_index = faiss.read_index("faiss_index.bin") # ----------------------- # 4. RAG Retrieval # ----------------------- def retrieve(query): q_emb = embedder.encode([query]) scores, indices = faiss_index.search(q_emb, 3) ctx = [] for idx in indices[0]: ctx.append(chunks[idx]) return "\n".join(ctx) # ----------------------- # 5. Generate answer # ----------------------- def answer_fn(message, history): context = retrieve(message) prompt = f""" Use the following context to answer the question clearly: CONTEXT: {context} QUESTION: {message} ANSWER: """ output = generator(prompt)[0]["generated_text"] return output # ----------------------- # 6. Gradio Chat UI # ----------------------- chatbot = gr.ChatInterface(fn=answer_fn) if __name__ == "__main__": chatbot.launch()