Spaces:

balodhi
/

ragveda

Runtime error

App Files Files Community

ragveda / app.py

balodhi

fixed stuff

3bb3644 20 days ago

raw

history blame contribute delete

4.1 kB

	import gradio as gr
	from huggingface_hub import InferenceClient
	#from sentence_transformers import SentenceTransformer
	#import faiss
	import numpy as np

	# =========================
	# Simple HF Embedding Retrieval (No Local Models)
	# =========================

	embedding_client = InferenceClient(model="sentence-transformers/all-MiniLM-L6-v2")

	def embed_texts(texts):
	if isinstance(texts, str):
	texts = [texts]
	return np.array(embedding_client.feature_extraction(texts))



	# =========================
	# Load and Prepare Gita Text
	# =========================

	with open("gita.txt", "r", encoding="utf-8") as f:
	raw_text = f.read()

	def chunk_text(text, chunk_size=500, overlap=50):
	chunks = []
	start = 0
	while start < len(text):
	end = start + chunk_size
	chunks.append(text[start:end])
	start += chunk_size - overlap
	return chunks

	documents = chunk_text(raw_text)
	doc_embeddings = embed_texts(documents)

	# Embedding model (small + free)
	#embedder = SentenceTransformer("all-MiniLM-L6-v2")
	#doc_embeddings = #embedder.encode(documents)
	#dimension = doc_embeddings.shape[1]
	#doc_embeddings = embedder.encode(documents)

	def retrieve(query, top_k=4):
	query_embedding = embed_texts(query)[0]
	scores = np.dot(doc_embeddings, query_embedding)
	top_indices = np.argsort(scores)[-top_k:][::-1]
	results = [documents[i] for i in top_indices]
	return "\n\n".join(results)


	# index = faiss.IndexFlatL2(dimension)
	# index.add(np.array(doc_embeddings))


	# def retrieve(query, top_k=4):
	# query_embedding = embedder.encode([query])
	# distances, indices = index.search(np.array(query_embedding), top_k)
	# results = [documents[i] for i in indices[0]]
	# return "\n\n".join(results)


	# =========================
	# RAG Chat Function
	# =========================

	def respond(
	message,
	history: list[dict[str, str]],
	system_message,
	max_tokens,
	temperature,
	top_p,
	hf_token: gr.OAuthToken,
	):
	"""
	For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
	"""
	#client = InferenceClient(token=hf_token.token, model="openai/gpt-oss-20b")
	client = InferenceClient(token=hf_token.token)

	# Retrieve relevant Gita chunks
	context = retrieve(message)

	augmented_system_message = (
	system_message
	+ "\n\nYou are RAGVeda, an expert in Indian philosophy."
	+ "\nAnswer ONLY using the Bhagavad Gita context below."
	+ "\nIf answer not found, say you do not know."
	+ "\n\nContext:\n"
	+ context
	)

	messages = [{"role": "system", "content": augmented_system_message}]
	messages.extend(history)
	messages.append({"role": "user", "content": message})

	response = ""

	for message in client.chat_completion(
	messages,
	max_tokens=max_tokens,
	stream=True,
	temperature=temperature,
	top_p=top_p,
	):
	choices = message.choices
	token = ""
	if len(choices) and choices[0].delta.content:
	token = choices[0].delta.content

	response += token
	yield response


	"""
	For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
	"""
	chatbot = gr.ChatInterface(
	respond,
	type="messages",
	additional_inputs=[
	gr.Textbox(
	value="You are RAGVeda, a calm and wise assistant rooted in the Bhagavad Gita.",
	label="System message",
	),
	gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
	gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
	gr.Slider(
	minimum=0.1,
	maximum=1.0,
	value=0.95,
	step=0.05,
	label="Top-p (nucleus sampling)",
	),
	],
	)

	with gr.Blocks() as demo:
	with gr.Sidebar():
	gr.LoginButton()
	chatbot.render()


	if __name__ == "__main__":
	demo.launch()