Spaces:

jk12p
/

RAG

Running

App Files Files Community

RAG / app.py

jk12p

Update app.py

8588048 verified 27 days ago

raw

history blame contribute delete

4.07 kB

	import streamlit as st
	import torch
	import fitz # PyMuPDF
	import os
	import faiss
	import numpy as np

	from sentence_transformers import SentenceTransformer
	from transformers import AutoTokenizer, AutoModelForCausalLM

	# ================= CONFIG =================
	st.set_page_config(page_title="RAG with Phi-2", layout="wide")

	HF_TOKEN = os.environ.get("HF_TOKEN")

	DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

	with st.sidebar:
	st.markdown("### 🖥️ System Info")
	st.text(f"Device: {DEVICE}")

	# ================= LOAD MODEL =================
	@st.cache_resource
	def load_llm():
	tokenizer = AutoTokenizer.from_pretrained(
	"microsoft/phi-2",
	token=HF_TOKEN
	)

	model = AutoModelForCausalLM.from_pretrained(
	"microsoft/phi-2",
	torch_dtype=torch.float32, # REQUIRED for CPU
	low_cpu_mem_usage=True
	)

	model.eval()
	return tokenizer, model


	@st.cache_resource
	def load_embedder():
	return SentenceTransformer("all-MiniLM-L6-v2")


	tokenizer, model = load_llm()
	embedder = load_embedder()

	# ================= UI =================
	st.title("🔍 RAG App using 🤖 Phi-2")

	with st.sidebar:
	st.header("📁 Upload Document")
	uploaded_file = st.file_uploader("Upload PDF or TXT", type=["pdf", "txt"])

	# ================= HELPERS =================
	def extract_text(file):
	if file.type == "application/pdf":
	doc = fitz.open(stream=file.read(), filetype="pdf")
	return "\n".join(page.get_text() for page in doc)
	else:
	return file.read().decode("utf-8")


	def split_into_chunks(text, chunk_size=500):
	return [text[i:i + chunk_size] for i in range(0, len(text), chunk_size)]


	def create_faiss_index(chunks):
	embeddings = embedder.encode(chunks, show_progress_bar=True)
	embeddings = np.array(embeddings).astype("float32")

	index = faiss.IndexFlatL2(embeddings.shape[1])
	index.add(embeddings)
	return index, embeddings


	def retrieve_chunks(query, chunks, index, k=5):
	query_embedding = embedder.encode([query]).astype("float32")
	_, indices = index.search(query_embedding, k)
	return [chunks[i] for i in indices[0]]


	def generate_answer(context, question):
	prompt = f"""
	Instruction: Answer ONLY using the context below.
	If the answer is not present, say "Information not found."

	Context:
	{context}

	Question:
	{question}

	Answer:
	"""

	inputs = tokenizer(prompt, return_tensors="pt")

	with torch.no_grad():
	output = model.generate(
	**inputs,
	max_new_tokens=256,
	temperature=0.2,
	do_sample=True,
	top_p=0.9
	)

	return tokenizer.decode(output[0], skip_special_tokens=True).split("Answer:")[-1].strip()

	# ================= MAIN LOGIC =================
	if uploaded_file:
	raw_text = extract_text(uploaded_file)
	chunks = split_into_chunks(raw_text)

	st.sidebar.success(f"✅ {len(chunks)} chunks created")

	with st.sidebar.expander("📄 Extracted Text"):
	st.text_area("Text", raw_text, height=300)

	index, _ = create_faiss_index(chunks)

	st.markdown("### 💬 Chat with your document")

	if "messages" not in st.session_state:
	st.session_state.messages = []

	for msg in st.session_state.messages:
	with st.chat_message(msg["role"]):
	st.markdown(msg["content"])

	if user_query := st.chat_input("Ask a question"):
	with st.chat_message("user"):
	st.markdown(user_query)

	st.session_state.messages.append(
	{"role": "user", "content": user_query}
	)

	with st.chat_message("assistant"):
	with st.spinner("Thinking..."):
	context = "\n".join(
	retrieve_chunks(user_query, chunks, index)
	)
	answer = generate_answer(context, user_query)
	st.markdown(answer)

	st.session_state.messages.append(
	{"role": "assistant", "content": answer}
	)

	else:
	st.info("👈 Upload a document to begin chatting")