Spaces:

Kumaria
/

application-test-1

Sleeping

App Files Files Community

application-test-1 / app.py

Kumaria

Update app.py

d826c45 verified about 2 months ago

raw

history blame contribute delete

13.1 kB

	import os
	import streamlit as st
	from dotenv import load_dotenv
	import numpy as np

	from langchain_community.document_loaders import DirectoryLoader, TextLoader
	from langchain_text_splitters import RecursiveCharacterTextSplitter
	from langchain_community.vectorstores import FAISS
	from langchain.embeddings.base import Embeddings
	from huggingface_hub import InferenceClient

	# Load environment variables if .env file exists
	load_dotenv()

	st.set_page_config(page_title="RAG Chatbot", layout="wide")


	class HuggingFaceAPIEmbeddings(Embeddings):
	"""Custom embeddings class using HuggingFace Hub InferenceClient."""

	def __init__(self, api_key: str, model_name: str):
	self.client = InferenceClient(token=api_key)
	self.model_name = model_name

	def embed_documents(self, texts: list[str]) -> list[list[float]]:
	"""Embed a list of documents."""
	embeddings = []
	for text in texts:
	try:
	# Use feature_extraction which returns embeddings
	result = self.client.feature_extraction(text, model=self.model_name)

	# Convert to list if it's a numpy array
	if isinstance(result, np.ndarray):
	embeddings.append(result.tolist())
	else:
	embeddings.append(result)

	except Exception as e:
	st.error(f"Embedding error for text: {text[:50]}... \| Error: {e}")
	raise

	return embeddings

	def embed_query(self, text: str) -> list[float]:
	"""Embed a single query."""
	return self.embed_documents([text])[0]


	st.title("🤖 RAG Chatbot")

	# Sidebar
	with st.sidebar:
	st.header("Configuration")
	hf_token = st.text_input(
	"HuggingFace Token (free)",
	type="password",
	value=os.getenv("HF_TOKEN", ""),
	help="Get a free token at https://huggingface.co/settings/tokens"
	)

	# Model selection
	embedding_model = st.selectbox(
	"Embedding Model",
	[
	"sentence-transformers/all-MiniLM-L6-v2",
	"BAAI/bge-small-en-v1.5",
	"sentence-transformers/all-mpnet-base-v2"
	],
	help="Lightweight models that run on HuggingFace's servers"
	)

	llm_model = st.selectbox(
	"LLM Model",
	[
	"HuggingFaceH4/zephyr-7b-beta",
	"google/gemma-2-2b-it",
	"microsoft/Phi-3-mini-4k-instruct",
	"mistralai/Mistral-7B-Instruct-v0.2",
	"meta-llama/Llama-3.2-3B-Instruct",
	],
	help="Language model for generating answers. Zephyr and Gemma work best on Spaces."
	)

	chunk_size = st.slider("Chunk Size", 500, 2000, 1000, 100)
	num_results = st.slider("Number of Retrieved Documents", 1, 5, 3)

	st.markdown("### Knowledge Base")
	st.info("Ensure your documents are in the `knowledge_base` folder.")

	if st.button("🔄 Reload Knowledge Base"):
	st.cache_resource.clear()
	st.rerun()

	st.markdown("---")
	st.markdown("### 📋 Setup Instructions")
	st.markdown(
	"1. Go to [HuggingFace](https://huggingface.co/settings/tokens)\n"
	"2. Create Fine-grained token\n"
	"3. ✅ Enable 'Make calls to Inference Providers'\n"
	"4. Copy and paste token above"
	)

	# Initialize session state for chat history
	if "messages" not in st.session_state:
	st.session_state.messages = []


	# Function to load and process knowledge base
	@st.cache_resource(show_spinner="Loading Knowledge Base...")
	def load_and_process_data(_hf_token, _embedding_model, _chunk_size):
	"""Load documents and create vector store using API-based embeddings."""

	if not os.path.exists("knowledge_base"):
	os.makedirs("knowledge_base")
	st.error("Created 'knowledge_base' folder. Please add some .txt files and refresh.")
	st.stop()

	# Load documents
	try:
	loader = DirectoryLoader(
	"knowledge_base",
	glob="*/.txt",
	loader_cls=TextLoader,
	loader_kwargs={"autodetect_encoding": True}
	)
	documents = loader.load()
	except Exception as e:
	st.error(f"Error loading documents: {e}")
	st.stop()

	if not documents:
	st.error("No documents found in 'knowledge_base'. Please add .txt files.")
	st.stop()

	# Split text
	text_splitter = RecursiveCharacterTextSplitter(
	chunk_size=_chunk_size,
	chunk_overlap=200,
	separators=["\n\n", "\n", ". ", " ", ""]
	)
	chunks = text_splitter.split_documents(documents)

	# Create embeddings using custom class
	embeddings = HuggingFaceAPIEmbeddings(
	api_key=_hf_token,
	model_name=_embedding_model
	)

	# Test the embeddings first
	try:
	st.info("Testing embedding API connection...")
	test_embedding = embeddings.embed_query("test")
	st.success(f"✅ Embedding API working! Vector size: {len(test_embedding)}")
	except Exception as e:
	st.error(f"❌ Embedding API test failed: {e}")
	st.error(
	"Please check:\n"
	"1. Your token has 'Make calls to Inference Providers' enabled\n"
	"2. You're using a 'Fine-grained' or 'Write' token type\n"
	"3. The token is correctly copied (no extra spaces)\n"
	"4. The model is available on HuggingFace"
	)
	st.stop()

	# Create vector store
	vectorstore = FAISS.from_documents(
	documents=chunks,
	embedding=embeddings
	)

	return vectorstore, len(documents), len(chunks)


	def generate_answer(query: str, context: str, token: str, model: str) -> str:
	"""Use HuggingFace Inference API to generate an answer."""

	client = InferenceClient(token=token)

	# Build system message and user message
	system_message = "You are a helpful AI assistant. Answer questions based ONLY on the provided context. If the answer is not in the context, say 'I cannot find this information in the provided documents'."

	user_message = f"Context:\n{context}\n\nQuestion: {query}"

	try:
	# Try chat_completion first (works with newer models)
	messages = [
	{"role": "system", "content": system_message},
	{"role": "user", "content": user_message}
	]

	response = client.chat_completion(
	messages=messages,
	model=model,
	max_tokens=512,
	temperature=0.2,
	top_p=0.9,
	)

	# Extract the response text
	if hasattr(response, 'choices') and len(response.choices) > 0:
	answer = response.choices[0].message.content.strip()
	return answer if answer else "⚠️ Model returned empty response"
	else:
	return "⚠️ Unexpected response format"

	except Exception as e:
	error_msg = str(e).lower()

	# If chat_completion is not supported, try text_generation
	if "not supported" in error_msg or "task" in error_msg:
	try:
	# Build a formatted prompt for text generation
	if "mistral" in model.lower() or "mixtral" in model.lower():
	prompt = f"<s>[INST] {system_message}\n\n{user_message} [/INST]"
	elif "llama" in model.lower():
	prompt = f"<\|begin_of_text\|><\|start_header_id\|>system<\|end_header_id\|>\n\n{system_message}<\|eot_id\|><\|start_header_id\|>user<\|end_header_id\|>\n\n{user_message}<\|eot_id\|><\|start_header_id\|>assistant<\|end_header_id\|>\n\n"
	elif "gemma" in model.lower():
	prompt = f"<start_of_turn>user\n{system_message}\n\n{user_message}<end_of_turn>\n<start_of_turn>model\n"
	else:
	prompt = f"{system_message}\n\n{user_message}\n\nAnswer:"

	response = client.text_generation(
	prompt,
	model=model,
	max_new_tokens=512,
	temperature=0.2,
	top_p=0.9,
	return_full_text=False,
	)

	return response.strip() if response else "⚠️ Model returned empty response"

	except Exception as fallback_error:
	return f"⚠️ Error with both chat and text generation: {str(fallback_error)}"

	# Handle other errors
	if "503" in error_msg or "loading" in error_msg:
	return "⚠️ Model is currently loading. Please wait 20-30 seconds and try again."
	elif "401" in error_msg or "unauthorized" in error_msg:
	return "⚠️ Authentication failed. Please check your HuggingFace token."
	elif "403" in error_msg or "forbidden" in error_msg:
	return "⚠️ Access forbidden. Make sure 'Make calls to Inference Providers' is enabled."
	elif "timeout" in error_msg:
	return "⚠️ Request timed out. Please try again."
	else:
	return f"⚠️ Error: {str(e)}"


	# Main Application Logic
	if not hf_token:
	st.warning("⚠️ Please enter your HuggingFace token in the sidebar.")
	st.info(
	"### 🔑 How to Get Your Token:\n\n"
	"1. Visit [HuggingFace Settings](https://huggingface.co/settings/tokens)\n"
	"2. Click 'Create new token'\n"
	"3. Select 'Fine-grained' token type\n"
	"4. ✅ Check 'Make calls to Inference Providers'\n"
	"5. Create and copy your token\n"
	"6. Paste it in the sidebar ⬅️"
	)
	st.stop()

	try:
	# Load knowledge base
	vector_store, num_docs, num_chunks = load_and_process_data(
	hf_token,
	embedding_model,
	chunk_size
	)
	retriever = vector_store.as_retriever(search_kwargs={"k": num_results})

	# Show knowledge base stats
	st.success(f"✅ Knowledge base loaded: {num_docs} documents, {num_chunks} chunks")

	# Display Chat History
	for message in st.session_state.messages:
	with st.chat_message(message["role"]):
	st.markdown(message["content"])

	# User Input
	if user_input := st.chat_input("Ask something about your knowledge base..."):
	st.session_state.messages.append({"role": "user", "content": user_input})
	with st.chat_message("user"):
	st.markdown(user_input)

	with st.chat_message("assistant"):
	with st.spinner("Searching knowledge base..."):
	relevant_docs = retriever.invoke(user_input)

	if relevant_docs:
	context = "\n\n".join(
	[f"Document {i+1}:\n{doc.page_content}" for i, doc in enumerate(relevant_docs)]
	)

	with st.spinner("Generating answer..."):
	response = generate_answer(user_input, context, hf_token, llm_model)

	st.markdown(response)

	with st.expander("📄 View Source Documents"):
	for i, doc in enumerate(relevant_docs):
	source_file = doc.metadata.get('source', 'Unknown')
	st.markdown(f"Document {i+1} (from `{os.path.basename(source_file)}`):")
	st.text(doc.page_content)
	st.markdown("---")
	else:
	response = "❌ No relevant documents found."
	st.markdown(response)

	st.session_state.messages.append({"role": "assistant", "content": response})

	except Exception as e:
	st.error(f"❌ Error: {e}")

	error_str = str(e).lower()

	if "403" in error_str or "forbidden" in error_str:
	st.error(
	"### 🔑 Token Permission Issue\n\n"
	"This error usually means your token doesn't have the right permissions.\n\n"
	"Fix:\n"
	"1. Go to https://huggingface.co/settings/tokens\n"
	"2. Delete your old token\n"
	"3. Create a NEW token:\n"
	" - Type: Fine-grained\n"
	" - ✅ Check 'Make calls to Inference Providers'\n"
	"4. Copy the NEW token\n"
	"5. Paste it in the sidebar and refresh"
	)
	elif "410" in error_str or "gone" in error_str:
	st.error(
	"### ⚠️ API Endpoint Issue\n\n"
	"The API endpoint has changed or the model is no longer available.\n\n"
	"Try:\n"
	"1. Select a different embedding model from the sidebar\n"
	"2. Make sure you have the latest version: `pip install --upgrade huggingface_hub`\n"
	"3. Check if the model exists on HuggingFace"
	)

	with st.expander("🐛 Full Error Details"):
	st.exception(e)

	# Footer
	st.markdown("---")
	st.caption("💡 All processing via HuggingFace API - no local model downloads!")