import os from huggingface_hub import InferenceClient import gradio as gr from langchain_community.document_loaders import PyPDFLoader from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain_community.vectorstores import Chroma from langchain_huggingface import HuggingFaceEmbeddings import time from datetime import datetime # Load environment variables print("šŸš€ Starting Stecu RAG Chatbot...") print("=" * 60) print(f"ā° Initialization started at: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}") print("=" * 60) print("\nšŸ“‹ Step 1: Loading environment variables...") # Get HF_TOKEN from environment (Hugging Face Spaces automatically provides this) HF_TOKEN = os.getenv("HF_TOKEN") if HF_TOKEN: print("āœ… Hugging Face token found in environment") else: print("āŒ Warning: HF_TOKEN not found in environment variables") print("\nšŸ¤– Step 2: Initializing Hugging Face InferenceClient...") try: client = InferenceClient(token=HF_TOKEN) print("āœ… InferenceClient initialized successfully") print(f" Using model: mistralai/Mistral-7B-Instruct-v0.3") except Exception as e: print(f"āŒ Error initializing InferenceClient: {e}") # Load and process the Scrum Guide PDF def load_knowledge_base(): print("\nšŸ“š Step 3: Loading and processing Scrum Guide PDF...") # Check if PDF exists pdf_path = "Scrum Guide.pdf" if not os.path.exists(pdf_path): print(f"āŒ Error: '{pdf_path}' not found in current directory") print(" Please make sure the Scrum Guide PDF is in the same folder as this script") return None print(f"āœ… Found PDF file: {pdf_path}") print(f" File size: {os.path.getsize(pdf_path) / 1024:.1f} KB") # Load the PDF print("\nšŸ“– Step 3a: Loading PDF content...") start_time = time.time() try: loader = PyPDFLoader(pdf_path) documents = loader.load() load_time = time.time() - start_time print(f"āœ… PDF loaded successfully in {load_time:.2f} seconds") print(f" Total pages: {len(documents)}") print(f" Total characters: {sum(len(doc.page_content) for doc in documents):,}") except Exception as e: print(f"āŒ Error loading PDF: {e}") return None # Split documents into chunks print("\nāœ‚ļø Step 3b: Splitting documents into chunks...") start_time = time.time() text_splitter = RecursiveCharacterTextSplitter( chunk_size=600, # Smaller chunks for focused retrieval chunk_overlap=80, # Minimal overlap to reduce duplication separators=["\n\n", "\n", ". ", "! ", "? ", ", ", " ", ""] # Better splitting ) chunks = text_splitter.split_documents(documents) chunk_time = time.time() - start_time print(f"āœ… Document chunking completed in {chunk_time:.2f} seconds") print(f" Total chunks created: {len(chunks)}") print(f" Average chunk size: {sum(len(chunk.page_content) for chunk in chunks) // len(chunks)} characters") print(f" Chunk size range: {min(len(chunk.page_content) for chunk in chunks)} - {max(len(chunk.page_content) for chunk in chunks)} characters") # Create embeddings and store in Chroma vector database print("\n🧠 Step 3c: Creating embeddings and vector database...") print(" This may take a few minutes depending on your hardware...") start_time = time.time() try: print(" šŸ“„ Downloading embedding model: sentence-transformers/all-MiniLM-L6-v2") embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") print(" āœ… Embedding model loaded successfully") print(" šŸ”„ Generating embeddings for all chunks...") vectorstore = Chroma.from_documents(chunks, embedding_model) embedding_time = time.time() - start_time print(f"āœ… Vector database created successfully in {embedding_time:.2f} seconds") print(f" Vector database contains {len(chunks)} document embeddings") print(f" Embedding model dimensions: 384 (MiniLM-L6-v2)") except Exception as e: print(f"āŒ Error creating embeddings: {e}") return None return vectorstore def clean_response(response): """Clean up response artifacts and formatting issues""" artifacts = ["[/ASS]", "", "[/INST]", "[/", "Human:", "User:", "Assistant:", "Context:", "Instructions:", "Stecu:", "In Scrum,", "During the Sprint", "Here's", "Here is"] for artifact in artifacts: response = response.replace(artifact, "") if ":" in response[:20]: parts = response.split(":", 1) if len(parts) > 1: response = parts[1].strip() response = " ".join(response.split()) response = response.replace("[", "").replace("]", "") if response.startswith('"') and response.endswith('"'): response = response[1:-1] unwanted_starts = ["In Scrum,", "During the Sprint,", "The answer is", "Well,", "So,", "Basically,"] for start in unwanted_starts: if response.startswith(start): response = response[len(start):].strip() if response and len(response) > 10: incomplete_patterns = [" and", " or", " but", " which", " that", " where", " when", " who", " what", " how"] for pattern in incomplete_patterns: if response.endswith(pattern): response = response[:-len(pattern)].strip() break return response.strip() def get_question_intent(message): """Determine the type of question to provide appropriate response style""" message_lower = message.lower() if any(word in message_lower for word in ["what is", "define", "explain", "tell me about"]): return "definition" elif any(word in message_lower for word in ["how", "how to", "process", "steps"]): return "process" elif any(word in message_lower for word in ["why", "benefit", "advantage", "purpose"]): return "rationale" elif any(word in message_lower for word in ["who", "role", "responsibility"]): return "roles" # NEW: Add duration intent detection elif any(word in message_lower for word in ["how long", "duration", "time", "minutes", "hours", "days", "weeks", "length"]): return "duration" else: return "general" def is_scrum_related(message, contexts): """Check if the question is related to Scrum based on context relevance""" if not contexts: return False # ENHANCED: Added duration-related keywords scrum_keywords = ["scrum", "sprint", "product owner", "scrum master", "developer", "backlog", "retrospective", "review", "daily", "planning", "duration", "time", "minutes", "hours", "weeks"] message_lower = message.lower() if any(keyword in message_lower for keyword in scrum_keywords): return True for context in contexts: if len(context) > 50 and any(keyword in context.lower() for keyword in scrum_keywords): return True return False def respond(message, history): if vectorstore is None: return "I apologize, but I can only answer questions based on the Scrum Guide PDF. Please make sure the PDF is loaded properly." identity_keywords = ["who are you", "what are you", "introduce yourself", "tell me about yourself", "your name"] if any(keyword in message.lower() for keyword in identity_keywords): return "Hi! I'm Stecu, your Scrum coach. I can help you learn about Scrum by answering questions based on the official Scrum Guide." greeting_keywords = ["hello", "hi", "hey", "good morning", "good afternoon", "good evening", "thanks", "thank you"] if any(keyword in message.lower().strip() for keyword in greeting_keywords) and len(message.strip()) < 25: return "Hello! I'm Stecu, your Scrum coach. Feel free to ask me any questions about Scrum and I'll explain them using the official Scrum Guide." try: # ENHANCED: Increase retrieval count for better context coverage retriever = vectorstore.as_retriever(search_kwargs={"k": 8}) relevant_docs = retriever.invoke(message) except Exception as e: print(f"Error retrieving documents: {e}") return "I'm having trouble accessing the Scrum Guide content. Please try again." contexts = [] seen_content = set() for doc in relevant_docs: content = doc.page_content.strip() content_key = content[:80].lower() # RELAXED: Reduce minimum content length for better duration info capture if content_key not in seen_content and len(content) > 15: seen_content.add(content_key) contexts.append(content) if not is_scrum_related(message, contexts): return "I can only answer questions about Scrum based on the official Scrum Guide. Please ask me about Scrum concepts, roles, events, artifacts, or processes." if not contexts: return "I can only answer questions about Scrum based on the official Scrum Guide. Please ask me about Scrum concepts, roles, events, artifacts, or processes." # ENHANCED: Use more contexts for better information coverage combined_context = "\n\n".join(contexts[:5]) intent = get_question_intent(message) if intent == "definition": instruction_focus = "Provide a clear, concise definition based on the Scrum Guide." elif intent == "process": instruction_focus = "Explain the key steps or process as described in the Scrum Guide." elif intent == "roles": instruction_focus = "Explain the responsibilities as defined in the Scrum Guide." elif intent == "duration": instruction_focus = "Provide the specific duration, time, or length mentioned in the Scrum Guide." else: instruction_focus = "Answer the question based on the Scrum Guide information." # ENHANCED: Improved system prompt for better duration handling system_prompt = ( "You are Stecu, a Scrum coach. You must answer the user's question using ONLY the provided 'Context from Scrum Guide PDF' below. Do not use any external knowledge. " "Your answer should be helpful, conversational, and 1-3 sentences long. " "If asked about durations, times, or lengths, look carefully in the context for specific time measurements (minutes, hours, days, weeks) and provide them exactly as stated.\n\n" "If the provided context does not contain enough information to answer the question, you MUST reply with the single sentence: 'I could not find an answer to your question in the provided text.' Do not add any other information.\n\n" f"Instruction: {instruction_focus}\n\n" "Context from Scrum Guide PDF:\n" f"'{combined_context}'\n\n" ) messages = [ {"role": "system", "content": system_prompt}, {"role": "user", "content": message} ] for attempt in range(3): try: completion = client.chat.completions.create( model="mistralai/Mistral-7B-Instruct-v0.3", messages=messages, max_tokens=120, # Slightly increased for duration explanations temperature=0.2, top_p=0.9, stop=["[/INST]", "", "\n\n", "Human:", "User:", "Assistant:", "Context:"] ) response = completion.choices[0].message.content response = clean_response(response) external_knowledge_indicators = [ "i know that", "generally speaking", "typically", "usually", "in my experience", "from what I understand", "as far as I know", "it's common", "normally" ] response_lower = response.lower() if any(indicator in response_lower for indicator in external_knowledge_indicators): continue if response and len(response) > 10: if not response.endswith('.'): response += "." return response except Exception as e: print(f"Attempt {attempt + 1} failed: {e}") continue return "I can only provide answers based on the Scrum Guide PDF. Please try asking your question in a different way." # Initialize the vectorstore print("\nšŸŽÆ Starting knowledge base initialization...") vectorstore = load_knowledge_base() if vectorstore is None: print("\nāŒ Failed to initialize knowledge base.") def respond_fallback(message, history): return "I apologize, but the Scrum Guide PDF is not available. Please ensure the PDF file is uploaded to this Space." respond = respond_fallback print("\nšŸŽ‰ Knowledge base initialization completed successfully!") print("=" * 60) print(f"ā° Initialization completed at: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}") print("=" * 60) print("\n🌐 Step 4: Setting up Gradio interface...") # Create the Gradio ChatInterface optimized for Hugging Face Spaces chat_interface = gr.ChatInterface( fn=respond, title="šŸƒā€ā™‚ļø Stecu: Scrum Teaching Chatbot Unit", description="Hi! I'm Stecu, your Scrum coach. Ask me anything about Scrum and I'll explain it in simple terms based on the official Scrum Guide. Perfect for beginners and experienced practitioners alike!", type="messages", examples=[ "What is Scrum?", "What are the main Scrum roles?", "How does a Sprint work?", "What's the difference between Scrum Master and Product Owner?", "What happens in a Daily Scrum?", "How do you plan a Sprint?", "What is a Product Backlog?", "Why use Scrum?", "What is a Sprint Review?", "What is a Sprint Retrospective?", "How long is Sprint Planning?", "What is the duration of a Daily Scrum?", "How long can a Sprint last?" ], theme=gr.themes.Soft( primary_hue="blue", secondary_hue="purple", neutral_hue="gray", ), css=""" .gradio-container { max-width: 1000px; margin: 0 auto; font-family: 'Inter', sans-serif; } .chat-message { padding: 16px; border-radius: 12px; margin: 12px 0; box-shadow: 0 2px 8px rgba(0,0,0,0.1); } .user-message { background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); color: white; margin-left: 20%; } .bot-message { background: linear-gradient(135deg, #f093fb 0%, #f5576c 100%); color: white; margin-right: 20%; } .title { text-align: center; color: #2c3e50; font-size: 2.5em; margin-bottom: 10px; } .description { text-align: center; color: #34495e; font-size: 1.2em; margin-bottom: 30px; } """, chatbot=gr.Chatbot( height=600, show_label=False, container=True, scale=1, type="messages" ), ) print("āœ… Gradio interface configured successfully") # Launch the interface if __name__ == "__main__": print("\nšŸš€ Step 5: Launching web interface...") print("=" * 60) print("🌟 Stecu RAG Chatbot is ready!") print("=" * 60) # Launch configuration optimized for Hugging Face Spaces chat_interface.launch( server_name="0.0.0.0", server_port=7860, share=False, debug=False, show_error=True, show_api=False, quiet=False )