Spaces:

dsimeone
/

organic-chatbot

Sleeping

daniel-simeone

update references

7e002df about 1 month ago

18 kB

	"""
	Gradio app for Hugging Face chatbot with RAG capabilities.
	"""
	import warnings

	# Suppress deprecation from dependencies (e.g. accelerate) until they use torch.distributed.ReduceOp
	warnings.filterwarnings(
	"ignore",
	message=".torch.distributed.reduce_op.ReduceOp.*",
	category=FutureWarning,
	)

	import gradio as gr
	from gradio.themes.base import Base
	from gradio.themes.utils import colors, fonts, sizes
	import os
	from typing import List, Tuple
	from huggingface_hub import InferenceClient
	from ingestion import DocumentIngestion


	# Create a clean minimalist theme
	class MinimalistTheme(Base):
	"""A clean, minimalist theme with subtle colors and simple styling."""
	def __init__(self):
	super().__init__(
	primary_hue=colors.blue,
	secondary_hue=colors.gray,
	neutral_hue=colors.gray,
	spacing_size=sizes.spacing_md,
	radius_size=sizes.radius_sm,
	text_size=sizes.text_md,
	font=(
	fonts.GoogleFont("Inter"),
	"ui-sans-serif",
	"system-ui",
	"sans-serif",
	),
	font_mono=(
	fonts.GoogleFont("JetBrains Mono"),
	"ui-monospace",
	"monospace",
	),
	)
	super().set(
	# Clean backgrounds
	body_background_fill="#ffffff",
	body_background_fill_dark="#0f0f0f",
	block_background_fill="#ffffff",
	block_background_fill_dark="#1a1a1a",

	# Subtle borders
	block_border_width="1px",
	block_border_color="#e0e0e0",
	block_border_color_dark="#2a2a2a",
	block_shadow="none",

	# Clean buttons
	button_primary_background_fill="#2563eb",
	button_primary_background_fill_hover="#1d4ed8",
	button_primary_text_color="#ffffff",
	button_primary_background_fill_dark="#3b82f6",
	button_primary_background_fill_hover_dark="#2563eb",
	button_secondary_background_fill="#f3f4f6",
	button_secondary_background_fill_hover="#e5e7eb",
	button_secondary_text_color="#111827",
	button_secondary_background_fill_dark="#374151",
	button_secondary_background_fill_hover_dark="#4b5563",
	button_border_width="1px",

	# Input fields
	input_background_fill="#ffffff",
	input_background_fill_dark="#1a1a1a",
	input_border_width="1px",
	input_border_color="#d1d5db",
	input_border_color_dark="#374151",

	# Text colors
	body_text_color="#111827",
	body_text_color_dark="#e5e7eb",
	block_label_text_color="#374151",
	block_label_text_color_dark="#9ca3af",
	)


	class RAGChatbot:
	"""Chatbot with RAG capabilities."""

	# Default and fallback models (try in order until one is supported by your Inference API providers)
	DEFAULT_CHAT_MODEL = "Qwen/Qwen2.5-Coder-7B-Instruct"
	FALLBACK_CHAT_MODELS = [
	"ServiceNow-AI/Apriel-1.6-15b-Thinker:together",
	"microsoft/phi-2",
	"HuggingFaceH4/zephyr-7b-beta",
	]

	def __init__(
	self,
	model_name: str = None,
	embedding_model: str = "all-mpnet-base-v2",
	vector_store_path: str = "data/vector_store"
	):
	"""
	Initialize the RAG chatbot.

	Args:
	model_name: Hugging Face model name for the chatbot (via Inference API)
	embedding_model: Model for document embeddings
	vector_store_path: Path to saved vector store
	"""
	self.model_name = model_name if model_name else self.DEFAULT_CHAT_MODEL
	# Build list of models to try (primary first, then fallbacks not already primary)
	self._models_to_try = [self.model_name] + [
	m for m in self.FALLBACK_CHAT_MODELS if m != self.model_name
	]

	# Initialize Inference API client (no model in constructor so we can try multiple)
	hf_token = os.environ.get("HF_TOKEN")
	# Debug: report HF_TOKEN status (masked)
	if not hf_token:
	print("[DEBUG] HF_TOKEN: not set (empty or missing)")
	print("Warning: HF_TOKEN not set. Inference API calls may fail.")
	print("Set HF_TOKEN environment variable or add it to Space secrets.")
	else:
	masked = f"{hf_token[:4]}...{hf_token[-4:]}" if len(hf_token) > 8 else "****"
	print(f"[DEBUG] HF_TOKEN: set (length={len(hf_token)}, masked={masked})")
	print("HF_TOKEN found. Inference API ready.")

	print(f"[DEBUG] Inference API client (models to try: {self._models_to_try})")
	try:
	self.inference_client = InferenceClient(token=hf_token)
	print("[DEBUG] Inference API client initialized (model chosen per request with fallbacks)")
	except Exception as e:
	print(f"[DEBUG] Error initializing Inference API client: {type(e).__name__}: {e}")
	self.inference_client = None

	# Initialize document ingestion
	self.ingestion = DocumentIngestion(embedding_model=embedding_model)

	# Load vector store if it exists
	if os.path.exists(vector_store_path) and os.path.exists(
	os.path.join(vector_store_path, "index.faiss")
	):
	try:
	self.ingestion.load(vector_store_path)
	print("Loaded existing vector store")
	except Exception as e:
	print(f"Could not load vector store: {e}")

	self.chat_history = []

	def _generate_with_chat(self, user_content: str, max_new_tokens: int = 512) -> str:
	"""Call the Inference API using chat_completion; try fallback models if current is not supported."""
	last_error = None
	for model in self._models_to_try:
	print(f"[DEBUG] _generate_with_chat: trying model={model}, prompt_len={len(user_content)}, max_tokens={max_new_tokens}")
	try:
	response = self.inference_client.chat_completion(
	model=model,
	messages=[{"role": "user", "content": user_content}],
	max_tokens=max_new_tokens,
	temperature=0.7,
	)
	print(f"[DEBUG] chat_completion OK for model={model}, response type: {type(response).__name__}")
	if response and response.choices and len(response.choices) > 0:
	msg = response.choices[0].message
	if hasattr(msg, "content") and msg.content:
	# Remember this model for next time
	self.model_name = model
	self._models_to_try = [model] + [m for m in self._models_to_try if m != model]
	return msg.content.strip()
	print("[DEBUG] chat_completion returned empty or unexpected structure")
	except Exception as e:
	last_error = e
	err_str = str(e).lower()
	if "model_not_supported" in err_str or "not supported by any provider" in err_str:
	print(f"[DEBUG] Model {model} not available, trying next fallback.")
	continue
	print(f"[DEBUG] _generate_with_chat exception for {model}: {type(e).__name__}: {e}")
	import traceback
	traceback.print_exc()
	raise
	if last_error is not None:
	raise last_error
	return ""

	def generate_response(self, query: str, use_rag: bool = True, num_results: int = 5) -> str:
	"""
	Generate a response to the user query using RAG and Inference API.

	Args:
	query: User's question
	use_rag: Whether to use RAG (retrieve relevant documents)
	num_results: Number of document chunks to retrieve

	Returns:
	Generated response
	"""
	if self.inference_client is None:
	return "Error: Inference API client not initialized. Please check HF_TOKEN configuration."

	# If RAG is enabled and we have a vector store, retrieve context and generate answer
	if use_rag and self.ingestion.index is not None:
	try:
	results = self.ingestion.search(query, k=num_results)
	if results:
	# Build context from retrieved chunks; include source/title so the model can cite it
	context_parts = []
	for i, result in enumerate(results, 1):
	text = result['text'].strip()
	if not text:
	continue
	meta = result.get('metadata') or {}
	source_label = meta.get('document_title') or meta.get('source') or f"Source {i}"
	context_parts.append(f"[Context {i}] (Source: {source_label})\n{text}")

	context = "\n\n".join(context_parts)

	# Build instruction-tuned prompt
	prompt = f"""
	*You are an expert assistant specializing in organic farming, in particular in Canada and its legal context.
	Answer the user's question using only the information provided in the context.
	If the context does not include the information needed to answer the question, clearly say:
	"The provided context does not contain enough information to answer this question."
	When answering:

	Respond in English only.
	Do not use outside knowledge, assumptions, or guesswork.
	Cite or reference the specific parts of the context your answer is based on.
	Provide concise, accurate, and helpful explanations.
	Do not reveal your internal reasoning. Provide only the final answer.

	Structure your answer in the following format:
	Summary — A brief, high‑level answer.
	Supporting Details — Explain using information only from the provided context. When citing, use the Source label shown for that context (e.g. the document title or name in parentheses after [Context N]).
	Context References — List each reference with the exact Source shown for that context (e.g. "CAN/CGSB-32.312-2018" or the document title). Include section name or page when that information appears in the context text. Format: document/source, section or location if available, and a short quote or paraphrase. Do not use only "Context 1" or "Context 5" as the reference; always include the document title/source.

	Context:
	{context}

	Question: {query}

	Answer:"""

	# Build mapping from context index to source label for resolving references
	context_index_to_source = {}
	for i, result in enumerate(results, 1):
	meta = result.get("metadata") or {}
	context_index_to_source[i] = (
	meta.get("document_title") or meta.get("source") or f"Source {i}"
	)

	# Generate response using chat/comversational API (Mistral instruct uses this)
	try:
	response_text = self._generate_with_chat(prompt, max_new_tokens=512)
	if response_text:
	# Resolve [Context N] to actual source labels in the body
	for i, source_label in context_index_to_source.items():
	response_text = response_text.replace(
	f"[Context {i}]",
	f"({source_label})",
	)
	# Append a References section so users see what each source is
	ref_lines = [
	"",
	"---",
	"References",
	]
	for i, source_label in context_index_to_source.items():
	ref_lines.append(f"{i}. {source_label}")
	response_text = response_text.rstrip() + "\n\n" + "\n".join(ref_lines)
	return response_text
	raise ValueError("Empty response from model")
	except Exception as api_error:
	print(f"[DEBUG] RAG generation failed: {type(api_error).__name__}: {api_error}")
	err_str = str(api_error).lower()
	if "model_not_supported" in err_str or "not supported by any provider" in err_str:
	return (
	"None of the configured chat models are available with your Inference API providers.\n\n"
	"How to fix:\n"
	"1. See which models are available: https://huggingface.co/inference/models\n"
	"2. Enable providers (and pick a chat model): https://huggingface.co/settings/inference-api\n"
	"3. In app.py, set RAGChatbot(model_name=\"your-chosen-model-id\") to match a model you enabled."
	)
	# Fallback: return formatted chunks with note
	response_parts = []
	response_parts.append("I retrieved relevant information, but couldn't generate a synthesized answer. Here are the relevant chunks:\n\n")
	for i, result in enumerate(results, 1):
	meta = result.get('metadata') or {}
	source = meta.get('document_title') or meta.get('source', '')
	text = result['text'].strip()
	if text:
	response_parts.append(f"Relevant information {i} (from {source}):\n{text}\n")
	return "\n".join(response_parts)
	else:
	# No results found
	return "I couldn't find any relevant information in the documents to answer your question. Please try rephrasing or check if the documents contain information about this topic."
	except Exception as e:
	print(f"Error in RAG retrieval: {e}")
	return f"I encountered an error while searching the documents: {str(e)}"

	# If no RAG or no vector store, generate response without context
	try:
	prompt = f"""You are a helpful assistant. Answer the following question concisely.

	Question: {query}

	Answer:"""
	response_text = self._generate_with_chat(prompt, max_new_tokens=256)
	if response_text:
	return response_text
	return "I couldn't generate a response. Please try again."
	except Exception as e:
	print(f"Error generating response: {e}")
	return f"I encountered an error while generating a response: {str(e)}. Please check your HF_TOKEN configuration."

	def chat(self, message: str, history):
	"""
	Handle chat interaction.

	Args:
	message: User message
	history: Chat history (list of ChatMessage or dicts with 'role' and 'content')

	Returns:
	Updated history
	"""
	if not message or not message.strip():
	return "", history or []

	# Ensure history is a list
	if history is None:
	history = []

	# Add user message as dictionary
	history.append({"role": "user", "content": message})

	# Generate response (always use RAG)
	try:
	response = self.generate_response(message, use_rag=True)
	# Ensure response is not empty
	if not response or not response.strip():
	response = "I'm sorry, I couldn't generate a response. Please try again."
	except Exception as e:
	print(f"Error generating response: {e}")
	import traceback
	traceback.print_exc()
	response = f"I encountered an error: {str(e)}"

	# Add assistant response as dictionary
	history.append({"role": "assistant", "content": response})

	print(f"Debug - History length: {len(history)}")
	print(f"Debug - Response: {response[:100] if response else 'None'}...")

	return "", history


	# Initialize chatbot
	chatbot = RAGChatbot()


	# Create Gradio interface
	with gr.Blocks(title="OCO Chatbot") as app:
	gr.Markdown("OCO Chatbot")


	chatbot_interface = gr.Chatbot(
	label="Chat",
	height=500,
	value=[] # Initialize with empty list
	)

	with gr.Row():
	msg = gr.Textbox(
	label="Your Message",
	placeholder="Ask a question about your documents...",
	scale=4
	)

	with gr.Row():
	submit_btn = gr.Button("Send", variant="primary")
	clear_btn = gr.Button("Clear")

	msg.submit(
	chatbot.chat,
	inputs=[msg, chatbot_interface],
	outputs=[msg, chatbot_interface]
	)
	submit_btn.click(
	chatbot.chat,
	inputs=[msg, chatbot_interface],
	outputs=[msg, chatbot_interface]
	)
	def clear_chat():
	return [], ""

	clear_btn.click(clear_chat, outputs=[chatbot_interface, msg])


	if __name__ == "__main__":
	# Get port from environment variable (Hugging Face Spaces sets this) or default to 7860
	port = int(os.environ.get("PORT", 7860))
	app.launch(
	share=False,
	server_name="0.0.0.0",
	server_port=port,
	theme=MinimalistTheme()
	)