organic-chatbot / app.py
daniel-simeone
update references
7e002df
"""
Gradio app for Hugging Face chatbot with RAG capabilities.
"""
import warnings
# Suppress deprecation from dependencies (e.g. accelerate) until they use torch.distributed.ReduceOp
warnings.filterwarnings(
"ignore",
message=".*torch.distributed.reduce_op.*ReduceOp.*",
category=FutureWarning,
)
import gradio as gr
from gradio.themes.base import Base
from gradio.themes.utils import colors, fonts, sizes
import os
from typing import List, Tuple
from huggingface_hub import InferenceClient
from ingestion import DocumentIngestion
# Create a clean minimalist theme
class MinimalistTheme(Base):
"""A clean, minimalist theme with subtle colors and simple styling."""
def __init__(self):
super().__init__(
primary_hue=colors.blue,
secondary_hue=colors.gray,
neutral_hue=colors.gray,
spacing_size=sizes.spacing_md,
radius_size=sizes.radius_sm,
text_size=sizes.text_md,
font=(
fonts.GoogleFont("Inter"),
"ui-sans-serif",
"system-ui",
"sans-serif",
),
font_mono=(
fonts.GoogleFont("JetBrains Mono"),
"ui-monospace",
"monospace",
),
)
super().set(
# Clean backgrounds
body_background_fill="#ffffff",
body_background_fill_dark="#0f0f0f",
block_background_fill="#ffffff",
block_background_fill_dark="#1a1a1a",
# Subtle borders
block_border_width="1px",
block_border_color="#e0e0e0",
block_border_color_dark="#2a2a2a",
block_shadow="none",
# Clean buttons
button_primary_background_fill="#2563eb",
button_primary_background_fill_hover="#1d4ed8",
button_primary_text_color="#ffffff",
button_primary_background_fill_dark="#3b82f6",
button_primary_background_fill_hover_dark="#2563eb",
button_secondary_background_fill="#f3f4f6",
button_secondary_background_fill_hover="#e5e7eb",
button_secondary_text_color="#111827",
button_secondary_background_fill_dark="#374151",
button_secondary_background_fill_hover_dark="#4b5563",
button_border_width="1px",
# Input fields
input_background_fill="#ffffff",
input_background_fill_dark="#1a1a1a",
input_border_width="1px",
input_border_color="#d1d5db",
input_border_color_dark="#374151",
# Text colors
body_text_color="#111827",
body_text_color_dark="#e5e7eb",
block_label_text_color="#374151",
block_label_text_color_dark="#9ca3af",
)
class RAGChatbot:
"""Chatbot with RAG capabilities."""
# Default and fallback models (try in order until one is supported by your Inference API providers)
DEFAULT_CHAT_MODEL = "Qwen/Qwen2.5-Coder-7B-Instruct"
FALLBACK_CHAT_MODELS = [
"ServiceNow-AI/Apriel-1.6-15b-Thinker:together",
"microsoft/phi-2",
"HuggingFaceH4/zephyr-7b-beta",
]
def __init__(
self,
model_name: str = None,
embedding_model: str = "all-mpnet-base-v2",
vector_store_path: str = "data/vector_store"
):
"""
Initialize the RAG chatbot.
Args:
model_name: Hugging Face model name for the chatbot (via Inference API)
embedding_model: Model for document embeddings
vector_store_path: Path to saved vector store
"""
self.model_name = model_name if model_name else self.DEFAULT_CHAT_MODEL
# Build list of models to try (primary first, then fallbacks not already primary)
self._models_to_try = [self.model_name] + [
m for m in self.FALLBACK_CHAT_MODELS if m != self.model_name
]
# Initialize Inference API client (no model in constructor so we can try multiple)
hf_token = os.environ.get("HF_TOKEN")
# Debug: report HF_TOKEN status (masked)
if not hf_token:
print("[DEBUG] HF_TOKEN: not set (empty or missing)")
print("Warning: HF_TOKEN not set. Inference API calls may fail.")
print("Set HF_TOKEN environment variable or add it to Space secrets.")
else:
masked = f"{hf_token[:4]}...{hf_token[-4:]}" if len(hf_token) > 8 else "****"
print(f"[DEBUG] HF_TOKEN: set (length={len(hf_token)}, masked={masked})")
print("HF_TOKEN found. Inference API ready.")
print(f"[DEBUG] Inference API client (models to try: {self._models_to_try})")
try:
self.inference_client = InferenceClient(token=hf_token)
print("[DEBUG] Inference API client initialized (model chosen per request with fallbacks)")
except Exception as e:
print(f"[DEBUG] Error initializing Inference API client: {type(e).__name__}: {e}")
self.inference_client = None
# Initialize document ingestion
self.ingestion = DocumentIngestion(embedding_model=embedding_model)
# Load vector store if it exists
if os.path.exists(vector_store_path) and os.path.exists(
os.path.join(vector_store_path, "index.faiss")
):
try:
self.ingestion.load(vector_store_path)
print("Loaded existing vector store")
except Exception as e:
print(f"Could not load vector store: {e}")
self.chat_history = []
def _generate_with_chat(self, user_content: str, max_new_tokens: int = 512) -> str:
"""Call the Inference API using chat_completion; try fallback models if current is not supported."""
last_error = None
for model in self._models_to_try:
print(f"[DEBUG] _generate_with_chat: trying model={model}, prompt_len={len(user_content)}, max_tokens={max_new_tokens}")
try:
response = self.inference_client.chat_completion(
model=model,
messages=[{"role": "user", "content": user_content}],
max_tokens=max_new_tokens,
temperature=0.7,
)
print(f"[DEBUG] chat_completion OK for model={model}, response type: {type(response).__name__}")
if response and response.choices and len(response.choices) > 0:
msg = response.choices[0].message
if hasattr(msg, "content") and msg.content:
# Remember this model for next time
self.model_name = model
self._models_to_try = [model] + [m for m in self._models_to_try if m != model]
return msg.content.strip()
print("[DEBUG] chat_completion returned empty or unexpected structure")
except Exception as e:
last_error = e
err_str = str(e).lower()
if "model_not_supported" in err_str or "not supported by any provider" in err_str:
print(f"[DEBUG] Model {model} not available, trying next fallback.")
continue
print(f"[DEBUG] _generate_with_chat exception for {model}: {type(e).__name__}: {e}")
import traceback
traceback.print_exc()
raise
if last_error is not None:
raise last_error
return ""
def generate_response(self, query: str, use_rag: bool = True, num_results: int = 5) -> str:
"""
Generate a response to the user query using RAG and Inference API.
Args:
query: User's question
use_rag: Whether to use RAG (retrieve relevant documents)
num_results: Number of document chunks to retrieve
Returns:
Generated response
"""
if self.inference_client is None:
return "Error: Inference API client not initialized. Please check HF_TOKEN configuration."
# If RAG is enabled and we have a vector store, retrieve context and generate answer
if use_rag and self.ingestion.index is not None:
try:
results = self.ingestion.search(query, k=num_results)
if results:
# Build context from retrieved chunks; include source/title so the model can cite it
context_parts = []
for i, result in enumerate(results, 1):
text = result['text'].strip()
if not text:
continue
meta = result.get('metadata') or {}
source_label = meta.get('document_title') or meta.get('source') or f"Source {i}"
context_parts.append(f"[Context {i}] (Source: {source_label})\n{text}")
context = "\n\n".join(context_parts)
# Build instruction-tuned prompt
prompt = f"""
*You are an expert assistant specializing in organic farming, in particular in Canada and its legal context.
Answer the user's question using only the information provided in the context.
If the context does not include the information needed to answer the question, clearly say:
"The provided context does not contain enough information to answer this question."
When answering:
Respond in English only.
Do not use outside knowledge, assumptions, or guesswork.
Cite or reference the specific parts of the context your answer is based on.
Provide concise, accurate, and helpful explanations.
Do not reveal your internal reasoning. Provide only the final answer.
Structure your answer in the following format:
Summary — A brief, high‑level answer.
Supporting Details — Explain using information only from the provided context. When citing, use the Source label shown for that context (e.g. the document title or name in parentheses after [Context N]).
Context References — List each reference with the exact Source shown for that context (e.g. "CAN/CGSB-32.312-2018" or the document title). Include section name or page when that information appears in the context text. Format: document/source, section or location if available, and a short quote or paraphrase. Do not use only "Context 1" or "Context 5" as the reference; always include the document title/source.
Context:
{context}
Question: {query}
Answer:"""
# Build mapping from context index to source label for resolving references
context_index_to_source = {}
for i, result in enumerate(results, 1):
meta = result.get("metadata") or {}
context_index_to_source[i] = (
meta.get("document_title") or meta.get("source") or f"Source {i}"
)
# Generate response using chat/comversational API (Mistral instruct uses this)
try:
response_text = self._generate_with_chat(prompt, max_new_tokens=512)
if response_text:
# Resolve [Context N] to actual source labels in the body
for i, source_label in context_index_to_source.items():
response_text = response_text.replace(
f"[Context {i}]",
f"({source_label})",
)
# Append a References section so users see what each source is
ref_lines = [
"",
"---",
"**References**",
]
for i, source_label in context_index_to_source.items():
ref_lines.append(f"{i}. {source_label}")
response_text = response_text.rstrip() + "\n\n" + "\n".join(ref_lines)
return response_text
raise ValueError("Empty response from model")
except Exception as api_error:
print(f"[DEBUG] RAG generation failed: {type(api_error).__name__}: {api_error}")
err_str = str(api_error).lower()
if "model_not_supported" in err_str or "not supported by any provider" in err_str:
return (
"None of the configured chat models are available with your Inference API providers.\n\n"
"**How to fix:**\n"
"1. See which models are available: https://huggingface.co/inference/models\n"
"2. Enable providers (and pick a chat model): https://huggingface.co/settings/inference-api\n"
"3. In app.py, set RAGChatbot(model_name=\"your-chosen-model-id\") to match a model you enabled."
)
# Fallback: return formatted chunks with note
response_parts = []
response_parts.append("I retrieved relevant information, but couldn't generate a synthesized answer. Here are the relevant chunks:\n\n")
for i, result in enumerate(results, 1):
meta = result.get('metadata') or {}
source = meta.get('document_title') or meta.get('source', '')
text = result['text'].strip()
if text:
response_parts.append(f"**Relevant information {i}** (from {source}):\n{text}\n")
return "\n".join(response_parts)
else:
# No results found
return "I couldn't find any relevant information in the documents to answer your question. Please try rephrasing or check if the documents contain information about this topic."
except Exception as e:
print(f"Error in RAG retrieval: {e}")
return f"I encountered an error while searching the documents: {str(e)}"
# If no RAG or no vector store, generate response without context
try:
prompt = f"""You are a helpful assistant. Answer the following question concisely.
Question: {query}
Answer:"""
response_text = self._generate_with_chat(prompt, max_new_tokens=256)
if response_text:
return response_text
return "I couldn't generate a response. Please try again."
except Exception as e:
print(f"Error generating response: {e}")
return f"I encountered an error while generating a response: {str(e)}. Please check your HF_TOKEN configuration."
def chat(self, message: str, history):
"""
Handle chat interaction.
Args:
message: User message
history: Chat history (list of ChatMessage or dicts with 'role' and 'content')
Returns:
Updated history
"""
if not message or not message.strip():
return "", history or []
# Ensure history is a list
if history is None:
history = []
# Add user message as dictionary
history.append({"role": "user", "content": message})
# Generate response (always use RAG)
try:
response = self.generate_response(message, use_rag=True)
# Ensure response is not empty
if not response or not response.strip():
response = "I'm sorry, I couldn't generate a response. Please try again."
except Exception as e:
print(f"Error generating response: {e}")
import traceback
traceback.print_exc()
response = f"I encountered an error: {str(e)}"
# Add assistant response as dictionary
history.append({"role": "assistant", "content": response})
print(f"Debug - History length: {len(history)}")
print(f"Debug - Response: {response[:100] if response else 'None'}...")
return "", history
# Initialize chatbot
chatbot = RAGChatbot()
# Create Gradio interface
with gr.Blocks(title="OCO Chatbot") as app:
gr.Markdown("OCO Chatbot")
chatbot_interface = gr.Chatbot(
label="Chat",
height=500,
value=[] # Initialize with empty list
)
with gr.Row():
msg = gr.Textbox(
label="Your Message",
placeholder="Ask a question about your documents...",
scale=4
)
with gr.Row():
submit_btn = gr.Button("Send", variant="primary")
clear_btn = gr.Button("Clear")
msg.submit(
chatbot.chat,
inputs=[msg, chatbot_interface],
outputs=[msg, chatbot_interface]
)
submit_btn.click(
chatbot.chat,
inputs=[msg, chatbot_interface],
outputs=[msg, chatbot_interface]
)
def clear_chat():
return [], ""
clear_btn.click(clear_chat, outputs=[chatbot_interface, msg])
if __name__ == "__main__":
# Get port from environment variable (Hugging Face Spaces sets this) or default to 7860
port = int(os.environ.get("PORT", 7860))
app.launch(
share=False,
server_name="0.0.0.0",
server_port=port,
theme=MinimalistTheme()
)