Spaces:

firman-ml
/

Stecu-RAG

Sleeping

App Files Files Community

Stecu-RAG / app.py

firman-ml

Update app.py

f713f55 verified 6 months ago

raw

history blame contribute delete

15.7 kB

	import os
	from huggingface_hub import InferenceClient
	import gradio as gr
	from langchain_community.document_loaders import PyPDFLoader
	from langchain.text_splitter import RecursiveCharacterTextSplitter
	from langchain_community.vectorstores import Chroma
	from langchain_huggingface import HuggingFaceEmbeddings
	import time
	from datetime import datetime

	# Load environment variables
	print("🚀 Starting Stecu RAG Chatbot...")
	print("=" * 60)
	print(f"⏰ Initialization started at: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
	print("=" * 60)

	print("\n📋 Step 1: Loading environment variables...")

	# Get HF_TOKEN from environment (Hugging Face Spaces automatically provides this)
	HF_TOKEN = os.getenv("HF_TOKEN")
	if HF_TOKEN:
	print("✅ Hugging Face token found in environment")
	else:
	print("❌ Warning: HF_TOKEN not found in environment variables")

	print("\n🤖 Step 2: Initializing Hugging Face InferenceClient...")
	try:
	client = InferenceClient(token=HF_TOKEN)
	print("✅ InferenceClient initialized successfully")
	print(f" Using model: mistralai/Mistral-7B-Instruct-v0.3")
	except Exception as e:
	print(f"❌ Error initializing InferenceClient: {e}")

	# Load and process the Scrum Guide PDF
	def load_knowledge_base():
	print("\n📚 Step 3: Loading and processing Scrum Guide PDF...")

	# Check if PDF exists
	pdf_path = "Scrum Guide.pdf"
	if not os.path.exists(pdf_path):
	print(f"❌ Error: '{pdf_path}' not found in current directory")
	print(" Please make sure the Scrum Guide PDF is in the same folder as this script")
	return None

	print(f"✅ Found PDF file: {pdf_path}")
	print(f" File size: {os.path.getsize(pdf_path) / 1024:.1f} KB")

	# Load the PDF
	print("\n📖 Step 3a: Loading PDF content...")
	start_time = time.time()
	try:
	loader = PyPDFLoader(pdf_path)
	documents = loader.load()
	load_time = time.time() - start_time
	print(f"✅ PDF loaded successfully in {load_time:.2f} seconds")
	print(f" Total pages: {len(documents)}")
	print(f" Total characters: {sum(len(doc.page_content) for doc in documents):,}")
	except Exception as e:
	print(f"❌ Error loading PDF: {e}")
	return None

	# Split documents into chunks
	print("\n✂️ Step 3b: Splitting documents into chunks...")
	start_time = time.time()
	text_splitter = RecursiveCharacterTextSplitter(
	chunk_size=600, # Smaller chunks for focused retrieval
	chunk_overlap=80, # Minimal overlap to reduce duplication
	separators=["\n\n", "\n", ". ", "! ", "? ", ", ", " ", ""] # Better splitting
	)

	chunks = text_splitter.split_documents(documents)
	chunk_time = time.time() - start_time

	print(f"✅ Document chunking completed in {chunk_time:.2f} seconds")
	print(f" Total chunks created: {len(chunks)}")
	print(f" Average chunk size: {sum(len(chunk.page_content) for chunk in chunks) // len(chunks)} characters")
	print(f" Chunk size range: {min(len(chunk.page_content) for chunk in chunks)} - {max(len(chunk.page_content) for chunk in chunks)} characters")

	# Create embeddings and store in Chroma vector database
	print("\n🧠 Step 3c: Creating embeddings and vector database...")
	print(" This may take a few minutes depending on your hardware...")

	start_time = time.time()
	try:
	print(" 📥 Downloading embedding model: sentence-transformers/all-MiniLM-L6-v2")
	embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
	print(" ✅ Embedding model loaded successfully")

	print(" 🔄 Generating embeddings for all chunks...")
	vectorstore = Chroma.from_documents(chunks, embedding_model)

	embedding_time = time.time() - start_time
	print(f"✅ Vector database created successfully in {embedding_time:.2f} seconds")
	print(f" Vector database contains {len(chunks)} document embeddings")
	print(f" Embedding model dimensions: 384 (MiniLM-L6-v2)")

	except Exception as e:
	print(f"❌ Error creating embeddings: {e}")
	return None

	return vectorstore

	def clean_response(response):
	"""Clean up response artifacts and formatting issues"""
	artifacts = ["[/ASS]", "</s>", "[/INST]", "[/", "Human:", "User:", "Assistant:", "Context:", "Instructions:", "Stecu:", "In Scrum,", "During the Sprint", "Here's", "Here is"]
	for artifact in artifacts:
	response = response.replace(artifact, "")

	if ":" in response[:20]:
	parts = response.split(":", 1)
	if len(parts) > 1:
	response = parts[1].strip()

	response = " ".join(response.split())
	response = response.replace("[", "").replace("]", "")

	if response.startswith('"') and response.endswith('"'):
	response = response[1:-1]

	unwanted_starts = ["In Scrum,", "During the Sprint,", "The answer is", "Well,", "So,", "Basically,"]
	for start in unwanted_starts:
	if response.startswith(start):
	response = response[len(start):].strip()

	if response and len(response) > 10:
	incomplete_patterns = [" and", " or", " but", " which", " that", " where", " when", " who", " what", " how"]
	for pattern in incomplete_patterns:
	if response.endswith(pattern):
	response = response[:-len(pattern)].strip()
	break

	return response.strip()

	def get_question_intent(message):
	"""Determine the type of question to provide appropriate response style"""
	message_lower = message.lower()

	if any(word in message_lower for word in ["what is", "define", "explain", "tell me about"]):
	return "definition"
	elif any(word in message_lower for word in ["how", "how to", "process", "steps"]):
	return "process"
	elif any(word in message_lower for word in ["why", "benefit", "advantage", "purpose"]):
	return "rationale"
	elif any(word in message_lower for word in ["who", "role", "responsibility"]):
	return "roles"
	# NEW: Add duration intent detection
	elif any(word in message_lower for word in ["how long", "duration", "time", "minutes", "hours", "days", "weeks", "length"]):
	return "duration"
	else:
	return "general"

	def is_scrum_related(message, contexts):
	"""Check if the question is related to Scrum based on context relevance"""
	if not contexts:
	return False

	# ENHANCED: Added duration-related keywords
	scrum_keywords = ["scrum", "sprint", "product owner", "scrum master", "developer", "backlog", "retrospective", "review", "daily", "planning", "duration", "time", "minutes", "hours", "weeks"]
	message_lower = message.lower()

	if any(keyword in message_lower for keyword in scrum_keywords):
	return True

	for context in contexts:
	if len(context) > 50 and any(keyword in context.lower() for keyword in scrum_keywords):
	return True

	return False

	def respond(message, history):
	if vectorstore is None:
	return "I apologize, but I can only answer questions based on the Scrum Guide PDF. Please make sure the PDF is loaded properly."

	identity_keywords = ["who are you", "what are you", "introduce yourself", "tell me about yourself", "your name"]
	if any(keyword in message.lower() for keyword in identity_keywords):
	return "Hi! I'm Stecu, your Scrum coach. I can help you learn about Scrum by answering questions based on the official Scrum Guide."

	greeting_keywords = ["hello", "hi", "hey", "good morning", "good afternoon", "good evening", "thanks", "thank you"]
	if any(keyword in message.lower().strip() for keyword in greeting_keywords) and len(message.strip()) < 25:
	return "Hello! I'm Stecu, your Scrum coach. Feel free to ask me any questions about Scrum and I'll explain them using the official Scrum Guide."

	try:
	# ENHANCED: Increase retrieval count for better context coverage
	retriever = vectorstore.as_retriever(search_kwargs={"k": 8})
	relevant_docs = retriever.invoke(message)
	except Exception as e:
	print(f"Error retrieving documents: {e}")
	return "I'm having trouble accessing the Scrum Guide content. Please try again."

	contexts = []
	seen_content = set()

	for doc in relevant_docs:
	content = doc.page_content.strip()
	content_key = content[:80].lower()

	# RELAXED: Reduce minimum content length for better duration info capture
	if content_key not in seen_content and len(content) > 15:
	seen_content.add(content_key)
	contexts.append(content)

	if not is_scrum_related(message, contexts):
	return "I can only answer questions about Scrum based on the official Scrum Guide. Please ask me about Scrum concepts, roles, events, artifacts, or processes."

	if not contexts:
	return "I can only answer questions about Scrum based on the official Scrum Guide. Please ask me about Scrum concepts, roles, events, artifacts, or processes."

	# ENHANCED: Use more contexts for better information coverage
	combined_context = "\n\n".join(contexts[:5])

	intent = get_question_intent(message)

	if intent == "definition":
	instruction_focus = "Provide a clear, concise definition based on the Scrum Guide."
	elif intent == "process":
	instruction_focus = "Explain the key steps or process as described in the Scrum Guide."
	elif intent == "roles":
	instruction_focus = "Explain the responsibilities as defined in the Scrum Guide."
	elif intent == "duration":
	instruction_focus = "Provide the specific duration, time, or length mentioned in the Scrum Guide."
	else:
	instruction_focus = "Answer the question based on the Scrum Guide information."

	# ENHANCED: Improved system prompt for better duration handling
	system_prompt = (
	"You are Stecu, a Scrum coach. You must answer the user's question using ONLY the provided 'Context from Scrum Guide PDF' below. Do not use any external knowledge. "
	"Your answer should be helpful, conversational, and 1-3 sentences long. "
	"If asked about durations, times, or lengths, look carefully in the context for specific time measurements (minutes, hours, days, weeks) and provide them exactly as stated.\n\n"
	"If the provided context does not contain enough information to answer the question, you MUST reply with the single sentence: 'I could not find an answer to your question in the provided text.' Do not add any other information.\n\n"
	f"Instruction: {instruction_focus}\n\n"
	"Context from Scrum Guide PDF:\n"
	f"'{combined_context}'\n\n"
	)

	messages = [
	{"role": "system", "content": system_prompt},
	{"role": "user", "content": message}
	]

	for attempt in range(3):
	try:
	completion = client.chat.completions.create(
	model="mistralai/Mistral-7B-Instruct-v0.3",
	messages=messages,
	max_tokens=120, # Slightly increased for duration explanations
	temperature=0.2,
	top_p=0.9,
	stop=["[/INST]", "</s>", "\n\n", "Human:", "User:", "Assistant:", "Context:"]
	)

	response = completion.choices[0].message.content
	response = clean_response(response)

	external_knowledge_indicators = [
	"i know that", "generally speaking", "typically", "usually", "in my experience",
	"from what I understand", "as far as I know", "it's common", "normally"
	]

	response_lower = response.lower()
	if any(indicator in response_lower for indicator in external_knowledge_indicators):
	continue

	if response and len(response) > 10:
	if not response.endswith('.'):
	response += "."
	return response

	except Exception as e:
	print(f"Attempt {attempt + 1} failed: {e}")
	continue

	return "I can only provide answers based on the Scrum Guide PDF. Please try asking your question in a different way."

	# Initialize the vectorstore
	print("\n🎯 Starting knowledge base initialization...")
	vectorstore = load_knowledge_base()

	if vectorstore is None:
	print("\n❌ Failed to initialize knowledge base.")
	def respond_fallback(message, history):
	return "I apologize, but the Scrum Guide PDF is not available. Please ensure the PDF file is uploaded to this Space."
	respond = respond_fallback

	print("\n🎉 Knowledge base initialization completed successfully!")
	print("=" * 60)
	print(f"⏰ Initialization completed at: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
	print("=" * 60)

	print("\n🌐 Step 4: Setting up Gradio interface...")

	# Create the Gradio ChatInterface optimized for Hugging Face Spaces
	chat_interface = gr.ChatInterface(
	fn=respond,
	title="🏃‍♂️ Stecu: Scrum Teaching Chatbot Unit",
	description="Hi! I'm Stecu, your Scrum coach. Ask me anything about Scrum and I'll explain it in simple terms based on the official Scrum Guide. Perfect for beginners and experienced practitioners alike!",
	type="messages",
	examples=[
	"What is Scrum?",
	"What are the main Scrum roles?",
	"How does a Sprint work?",
	"What's the difference between Scrum Master and Product Owner?",
	"What happens in a Daily Scrum?",
	"How do you plan a Sprint?",
	"What is a Product Backlog?",
	"Why use Scrum?",
	"What is a Sprint Review?",
	"What is a Sprint Retrospective?",
	"How long is Sprint Planning?",
	"What is the duration of a Daily Scrum?",
	"How long can a Sprint last?"
	],
	theme=gr.themes.Soft(
	primary_hue="blue",
	secondary_hue="purple",
	neutral_hue="gray",
	),
	css="""
	.gradio-container {
	max-width: 1000px;
	margin: 0 auto;
	font-family: 'Inter', sans-serif;
	}
	.chat-message {
	padding: 16px;
	border-radius: 12px;
	margin: 12px 0;
	box-shadow: 0 2px 8px rgba(0,0,0,0.1);
	}
	.user-message {
	background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
	color: white;
	margin-left: 20%;
	}
	.bot-message {
	background: linear-gradient(135deg, #f093fb 0%, #f5576c 100%);
	color: white;
	margin-right: 20%;
	}
	.title {
	text-align: center;
	color: #2c3e50;
	font-size: 2.5em;
	margin-bottom: 10px;
	}
	.description {
	text-align: center;
	color: #34495e;
	font-size: 1.2em;
	margin-bottom: 30px;
	}
	""",
	chatbot=gr.Chatbot(
	height=600,
	show_label=False,
	container=True,
	scale=1,
	type="messages"
	),
	)

	print("✅ Gradio interface configured successfully")

	# Launch the interface
	if __name__ == "__main__":
	print("\n🚀 Step 5: Launching web interface...")
	print("=" * 60)
	print("🌟 Stecu RAG Chatbot is ready!")
	print("=" * 60)

	# Launch configuration optimized for Hugging Face Spaces
	chat_interface.launch(
	server_name="0.0.0.0",
	server_port=7860,
	share=False,
	debug=False,
	show_error=True,
	show_api=False,
	quiet=False
	)