Spaces:

Sof850
/

RAG

Runtime error

App Files Files Community

RAG / app.py

Kakarot21

Update app.py

e857a6a verified 3 months ago

raw

history blame

3 kB

	import gradio as gr
	from langchain_huggingface import HuggingFaceEmbeddings
	from langchain_chroma import Chroma
	from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
	import torch
	import os
	from data_cutter import create_db

	# Constants
	CHROMA_PATH = "chroma_db"
	MODEL_ID = "Qwen/Qwen2.5-1.5B-Instruct"

	print("🚀 Starting app...")

	# 1. Initialize/Load Database
	print("🔄 Initializing database from data folder...")
	# We rebuild the DB on startup to ensure it matches the current data
	try:
	vectorstore = create_db()
	print("✅ Database created successfully!")
	except Exception as e:
	print(f"❌ Error creating database: {e}")
	# Fallback: try to load if exists, though create_db should have handled it
	if os.path.exists(CHROMA_PATH):
	print("⚠️ Attempting to load existing database...")
	embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
	vectorstore = Chroma(persist_directory=CHROMA_PATH, embedding_function=embeddings)
	else:
	raise e

	# 2. Load AI Model
	print(f"🤖 Loading AI Model ({MODEL_ID})...")
	try:
	tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
	model = AutoModelForCausalLM.from_pretrained(MODEL_ID)

	pipe = pipeline(
	"text-generation",
	model=model,
	tokenizer=tokenizer,
	max_new_tokens=512,
	device=-1, # Run on CPU
	do_sample=True,
	temperature=0.7,
	top_p=0.9,
	)
	print("✅ AI Model loaded successfully!")
	except Exception as e:
	print(f"❌ Error loading model: {e}")
	raise e

	def chat_function(message, history):
	print(f"📨 Received query: {message}")

	# Search documents
	results = vectorstore.similarity_search(message, k=3)
	context = "\n\n".join([doc.page_content for doc in results])

	# Prepare prompt
	messages = [
	{"role": "system", "content": "You are a helpful assistant. Answer the user's question based ONLY on the provided context. If the answer is not in the context, say you don't know."},
	{"role": "user", "content": f"Context:\n{context}\n\nQuestion: {message}"}
	]

	prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)

	# Generate response
	outputs = pipe(prompt)
	generated_text = outputs[0]['generated_text']

	# Extract response
	if "<\|im_start\|>assistant" in generated_text:
	response_text = generated_text.split("<\|im_start\|>assistant")[-1].strip()
	elif prompt in generated_text:
	response_text = generated_text.replace(prompt, "").strip()
	else:
	response_text = generated_text

	return response_text

	# Create Gradio Interface
	demo = gr.ChatInterface(
	fn=chat_function,
	title="RAG Chat with Your Data",
	description=f"Ask questions about your documents. Powered by {MODEL_ID}.",
	examples=["What is the main topic?", "Summarize the content."]
	)

	if __name__ == "__main__":
	demo.launch()