Stecu-RAG / app.py
firman-ml's picture
Update app.py
f713f55 verified
import os
from huggingface_hub import InferenceClient
import gradio as gr
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain_huggingface import HuggingFaceEmbeddings
import time
from datetime import datetime
# Load environment variables
print("πŸš€ Starting Stecu RAG Chatbot...")
print("=" * 60)
print(f"⏰ Initialization started at: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
print("=" * 60)
print("\nπŸ“‹ Step 1: Loading environment variables...")
# Get HF_TOKEN from environment (Hugging Face Spaces automatically provides this)
HF_TOKEN = os.getenv("HF_TOKEN")
if HF_TOKEN:
print("βœ… Hugging Face token found in environment")
else:
print("❌ Warning: HF_TOKEN not found in environment variables")
print("\nπŸ€– Step 2: Initializing Hugging Face InferenceClient...")
try:
client = InferenceClient(token=HF_TOKEN)
print("βœ… InferenceClient initialized successfully")
print(f" Using model: mistralai/Mistral-7B-Instruct-v0.3")
except Exception as e:
print(f"❌ Error initializing InferenceClient: {e}")
# Load and process the Scrum Guide PDF
def load_knowledge_base():
print("\nπŸ“š Step 3: Loading and processing Scrum Guide PDF...")
# Check if PDF exists
pdf_path = "Scrum Guide.pdf"
if not os.path.exists(pdf_path):
print(f"❌ Error: '{pdf_path}' not found in current directory")
print(" Please make sure the Scrum Guide PDF is in the same folder as this script")
return None
print(f"βœ… Found PDF file: {pdf_path}")
print(f" File size: {os.path.getsize(pdf_path) / 1024:.1f} KB")
# Load the PDF
print("\nπŸ“– Step 3a: Loading PDF content...")
start_time = time.time()
try:
loader = PyPDFLoader(pdf_path)
documents = loader.load()
load_time = time.time() - start_time
print(f"βœ… PDF loaded successfully in {load_time:.2f} seconds")
print(f" Total pages: {len(documents)}")
print(f" Total characters: {sum(len(doc.page_content) for doc in documents):,}")
except Exception as e:
print(f"❌ Error loading PDF: {e}")
return None
# Split documents into chunks
print("\nβœ‚οΈ Step 3b: Splitting documents into chunks...")
start_time = time.time()
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=600, # Smaller chunks for focused retrieval
chunk_overlap=80, # Minimal overlap to reduce duplication
separators=["\n\n", "\n", ". ", "! ", "? ", ", ", " ", ""] # Better splitting
)
chunks = text_splitter.split_documents(documents)
chunk_time = time.time() - start_time
print(f"βœ… Document chunking completed in {chunk_time:.2f} seconds")
print(f" Total chunks created: {len(chunks)}")
print(f" Average chunk size: {sum(len(chunk.page_content) for chunk in chunks) // len(chunks)} characters")
print(f" Chunk size range: {min(len(chunk.page_content) for chunk in chunks)} - {max(len(chunk.page_content) for chunk in chunks)} characters")
# Create embeddings and store in Chroma vector database
print("\n🧠 Step 3c: Creating embeddings and vector database...")
print(" This may take a few minutes depending on your hardware...")
start_time = time.time()
try:
print(" πŸ“₯ Downloading embedding model: sentence-transformers/all-MiniLM-L6-v2")
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
print(" βœ… Embedding model loaded successfully")
print(" πŸ”„ Generating embeddings for all chunks...")
vectorstore = Chroma.from_documents(chunks, embedding_model)
embedding_time = time.time() - start_time
print(f"βœ… Vector database created successfully in {embedding_time:.2f} seconds")
print(f" Vector database contains {len(chunks)} document embeddings")
print(f" Embedding model dimensions: 384 (MiniLM-L6-v2)")
except Exception as e:
print(f"❌ Error creating embeddings: {e}")
return None
return vectorstore
def clean_response(response):
"""Clean up response artifacts and formatting issues"""
artifacts = ["[/ASS]", "</s>", "[/INST]", "[/", "Human:", "User:", "Assistant:", "Context:", "Instructions:", "Stecu:", "In Scrum,", "During the Sprint", "Here's", "Here is"]
for artifact in artifacts:
response = response.replace(artifact, "")
if ":" in response[:20]:
parts = response.split(":", 1)
if len(parts) > 1:
response = parts[1].strip()
response = " ".join(response.split())
response = response.replace("[", "").replace("]", "")
if response.startswith('"') and response.endswith('"'):
response = response[1:-1]
unwanted_starts = ["In Scrum,", "During the Sprint,", "The answer is", "Well,", "So,", "Basically,"]
for start in unwanted_starts:
if response.startswith(start):
response = response[len(start):].strip()
if response and len(response) > 10:
incomplete_patterns = [" and", " or", " but", " which", " that", " where", " when", " who", " what", " how"]
for pattern in incomplete_patterns:
if response.endswith(pattern):
response = response[:-len(pattern)].strip()
break
return response.strip()
def get_question_intent(message):
"""Determine the type of question to provide appropriate response style"""
message_lower = message.lower()
if any(word in message_lower for word in ["what is", "define", "explain", "tell me about"]):
return "definition"
elif any(word in message_lower for word in ["how", "how to", "process", "steps"]):
return "process"
elif any(word in message_lower for word in ["why", "benefit", "advantage", "purpose"]):
return "rationale"
elif any(word in message_lower for word in ["who", "role", "responsibility"]):
return "roles"
# NEW: Add duration intent detection
elif any(word in message_lower for word in ["how long", "duration", "time", "minutes", "hours", "days", "weeks", "length"]):
return "duration"
else:
return "general"
def is_scrum_related(message, contexts):
"""Check if the question is related to Scrum based on context relevance"""
if not contexts:
return False
# ENHANCED: Added duration-related keywords
scrum_keywords = ["scrum", "sprint", "product owner", "scrum master", "developer", "backlog", "retrospective", "review", "daily", "planning", "duration", "time", "minutes", "hours", "weeks"]
message_lower = message.lower()
if any(keyword in message_lower for keyword in scrum_keywords):
return True
for context in contexts:
if len(context) > 50 and any(keyword in context.lower() for keyword in scrum_keywords):
return True
return False
def respond(message, history):
if vectorstore is None:
return "I apologize, but I can only answer questions based on the Scrum Guide PDF. Please make sure the PDF is loaded properly."
identity_keywords = ["who are you", "what are you", "introduce yourself", "tell me about yourself", "your name"]
if any(keyword in message.lower() for keyword in identity_keywords):
return "Hi! I'm Stecu, your Scrum coach. I can help you learn about Scrum by answering questions based on the official Scrum Guide."
greeting_keywords = ["hello", "hi", "hey", "good morning", "good afternoon", "good evening", "thanks", "thank you"]
if any(keyword in message.lower().strip() for keyword in greeting_keywords) and len(message.strip()) < 25:
return "Hello! I'm Stecu, your Scrum coach. Feel free to ask me any questions about Scrum and I'll explain them using the official Scrum Guide."
try:
# ENHANCED: Increase retrieval count for better context coverage
retriever = vectorstore.as_retriever(search_kwargs={"k": 8})
relevant_docs = retriever.invoke(message)
except Exception as e:
print(f"Error retrieving documents: {e}")
return "I'm having trouble accessing the Scrum Guide content. Please try again."
contexts = []
seen_content = set()
for doc in relevant_docs:
content = doc.page_content.strip()
content_key = content[:80].lower()
# RELAXED: Reduce minimum content length for better duration info capture
if content_key not in seen_content and len(content) > 15:
seen_content.add(content_key)
contexts.append(content)
if not is_scrum_related(message, contexts):
return "I can only answer questions about Scrum based on the official Scrum Guide. Please ask me about Scrum concepts, roles, events, artifacts, or processes."
if not contexts:
return "I can only answer questions about Scrum based on the official Scrum Guide. Please ask me about Scrum concepts, roles, events, artifacts, or processes."
# ENHANCED: Use more contexts for better information coverage
combined_context = "\n\n".join(contexts[:5])
intent = get_question_intent(message)
if intent == "definition":
instruction_focus = "Provide a clear, concise definition based on the Scrum Guide."
elif intent == "process":
instruction_focus = "Explain the key steps or process as described in the Scrum Guide."
elif intent == "roles":
instruction_focus = "Explain the responsibilities as defined in the Scrum Guide."
elif intent == "duration":
instruction_focus = "Provide the specific duration, time, or length mentioned in the Scrum Guide."
else:
instruction_focus = "Answer the question based on the Scrum Guide information."
# ENHANCED: Improved system prompt for better duration handling
system_prompt = (
"You are Stecu, a Scrum coach. You must answer the user's question using ONLY the provided 'Context from Scrum Guide PDF' below. Do not use any external knowledge. "
"Your answer should be helpful, conversational, and 1-3 sentences long. "
"If asked about durations, times, or lengths, look carefully in the context for specific time measurements (minutes, hours, days, weeks) and provide them exactly as stated.\n\n"
"If the provided context does not contain enough information to answer the question, you MUST reply with the single sentence: 'I could not find an answer to your question in the provided text.' Do not add any other information.\n\n"
f"Instruction: {instruction_focus}\n\n"
"Context from Scrum Guide PDF:\n"
f"'{combined_context}'\n\n"
)
messages = [
{"role": "system", "content": system_prompt},
{"role": "user", "content": message}
]
for attempt in range(3):
try:
completion = client.chat.completions.create(
model="mistralai/Mistral-7B-Instruct-v0.3",
messages=messages,
max_tokens=120, # Slightly increased for duration explanations
temperature=0.2,
top_p=0.9,
stop=["[/INST]", "</s>", "\n\n", "Human:", "User:", "Assistant:", "Context:"]
)
response = completion.choices[0].message.content
response = clean_response(response)
external_knowledge_indicators = [
"i know that", "generally speaking", "typically", "usually", "in my experience",
"from what I understand", "as far as I know", "it's common", "normally"
]
response_lower = response.lower()
if any(indicator in response_lower for indicator in external_knowledge_indicators):
continue
if response and len(response) > 10:
if not response.endswith('.'):
response += "."
return response
except Exception as e:
print(f"Attempt {attempt + 1} failed: {e}")
continue
return "I can only provide answers based on the Scrum Guide PDF. Please try asking your question in a different way."
# Initialize the vectorstore
print("\n🎯 Starting knowledge base initialization...")
vectorstore = load_knowledge_base()
if vectorstore is None:
print("\n❌ Failed to initialize knowledge base.")
def respond_fallback(message, history):
return "I apologize, but the Scrum Guide PDF is not available. Please ensure the PDF file is uploaded to this Space."
respond = respond_fallback
print("\nπŸŽ‰ Knowledge base initialization completed successfully!")
print("=" * 60)
print(f"⏰ Initialization completed at: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
print("=" * 60)
print("\n🌐 Step 4: Setting up Gradio interface...")
# Create the Gradio ChatInterface optimized for Hugging Face Spaces
chat_interface = gr.ChatInterface(
fn=respond,
title="πŸƒβ€β™‚οΈ Stecu: Scrum Teaching Chatbot Unit",
description="Hi! I'm Stecu, your Scrum coach. Ask me anything about Scrum and I'll explain it in simple terms based on the official Scrum Guide. Perfect for beginners and experienced practitioners alike!",
type="messages",
examples=[
"What is Scrum?",
"What are the main Scrum roles?",
"How does a Sprint work?",
"What's the difference between Scrum Master and Product Owner?",
"What happens in a Daily Scrum?",
"How do you plan a Sprint?",
"What is a Product Backlog?",
"Why use Scrum?",
"What is a Sprint Review?",
"What is a Sprint Retrospective?",
"How long is Sprint Planning?",
"What is the duration of a Daily Scrum?",
"How long can a Sprint last?"
],
theme=gr.themes.Soft(
primary_hue="blue",
secondary_hue="purple",
neutral_hue="gray",
),
css="""
.gradio-container {
max-width: 1000px;
margin: 0 auto;
font-family: 'Inter', sans-serif;
}
.chat-message {
padding: 16px;
border-radius: 12px;
margin: 12px 0;
box-shadow: 0 2px 8px rgba(0,0,0,0.1);
}
.user-message {
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
color: white;
margin-left: 20%;
}
.bot-message {
background: linear-gradient(135deg, #f093fb 0%, #f5576c 100%);
color: white;
margin-right: 20%;
}
.title {
text-align: center;
color: #2c3e50;
font-size: 2.5em;
margin-bottom: 10px;
}
.description {
text-align: center;
color: #34495e;
font-size: 1.2em;
margin-bottom: 30px;
}
""",
chatbot=gr.Chatbot(
height=600,
show_label=False,
container=True,
scale=1,
type="messages"
),
)
print("βœ… Gradio interface configured successfully")
# Launch the interface
if __name__ == "__main__":
print("\nπŸš€ Step 5: Launching web interface...")
print("=" * 60)
print("🌟 Stecu RAG Chatbot is ready!")
print("=" * 60)
# Launch configuration optimized for Hugging Face Spaces
chat_interface.launch(
server_name="0.0.0.0",
server_port=7860,
share=False,
debug=False,
show_error=True,
show_api=False,
quiet=False
)