Spaces:
Sleeping
Sleeping
| import os | |
| import gradio as gr | |
| import warnings | |
| import json | |
| from dotenv import load_dotenv | |
| from typing import List | |
| import time | |
| from functools import lru_cache | |
| import logging | |
| from langchain_community.vectorstores import FAISS | |
| from langchain_community.embeddings import AzureOpenAIEmbeddings | |
| from openai import AzureOpenAI | |
| # Patch Gradio bug | |
| import gradio_client.utils | |
| gradio_client.utils.json_schema_to_python_type = lambda schema, defs=None: "string" | |
| # Load environment variables | |
| load_dotenv() | |
| AZURE_OPENAI_API_KEY = os.getenv("AZURE_OPENAI_API_KEY") | |
| AZURE_OPENAI_ENDPOINT = os.getenv("AZURE_OPENAI_ENDPOINT") | |
| AZURE_OPENAI_LLM_DEPLOYMENT = os.getenv("AZURE_OPENAI_LLM_DEPLOYMENT") | |
| AZURE_OPENAI_EMBEDDING_DEPLOYMENT = os.getenv("AZURE_OPENAI_EMBEDDING_DEPLOYMENT") | |
| if not all([AZURE_OPENAI_API_KEY, AZURE_OPENAI_ENDPOINT, AZURE_OPENAI_LLM_DEPLOYMENT, AZURE_OPENAI_EMBEDDING_DEPLOYMENT]): | |
| raise ValueError("Missing one or more Azure OpenAI environment variables.") | |
| warnings.filterwarnings("ignore") | |
| # Embeddings | |
| embeddings = AzureOpenAIEmbeddings( | |
| azure_deployment=AZURE_OPENAI_EMBEDDING_DEPLOYMENT, | |
| azure_endpoint=AZURE_OPENAI_ENDPOINT, | |
| openai_api_key=AZURE_OPENAI_API_KEY, | |
| openai_api_version="2025-01-01-preview", | |
| chunk_size=1000 | |
| ) | |
| # Vectorstore | |
| SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__)) | |
| FAISS_INDEX_PATH = os.path.join(SCRIPT_DIR, "faiss_index_sysml") | |
| vectorstore = FAISS.load_local(FAISS_INDEX_PATH, embeddings, allow_dangerous_deserialization=True) | |
| # OpenAI client | |
| client = AzureOpenAI( | |
| api_key=AZURE_OPENAI_API_KEY, | |
| api_version="2025-01-01-preview", | |
| azure_endpoint=AZURE_OPENAI_ENDPOINT | |
| ) | |
| # Logger | |
| logger = logging.getLogger(__name__) | |
| # Post-processing function to remove em dashes | |
| def clean_em_dashes(text: str) -> str: | |
| """Remove em dashes and replace with natural alternatives""" | |
| # Replace em dashes with commas or periods based on context | |
| text = text.replace("—which", ", which") | |
| text = text.replace("—that", ", that") | |
| text = text.replace("—no", ". No") | |
| text = text.replace("—and", ", and") | |
| text = text.replace("—but", ", but") | |
| text = text.replace("—so", ", so") | |
| text = text.replace("—you", ". You") | |
| text = text.replace("—it", ". It") | |
| text = text.replace("—just", ". Just") | |
| text = text.replace("—great", ", great") | |
| text = text.replace("—this", ". This") | |
| # Catch any remaining em dashes | |
| text = text.replace("—", ", ") | |
| return text | |
| # Enhanced SysML retriever with proper metadata filtering & weighting | |
| def sysml_retriever(query: str) -> str: | |
| try: | |
| print(f"\n🔍 QUERY: {query}") | |
| print("="*80) | |
| # Get more results for filtering and weighting | |
| results = vectorstore.similarity_search_with_score(query, k=100) | |
| print(f"📊 Total results retrieved: {len(results)}") | |
| # Apply metadata filtering and weighting | |
| weighted_results = [] | |
| sysmodeler_count = 0 | |
| other_count = 0 | |
| for i, (doc, score) in enumerate(results): | |
| # Get document source | |
| doc_source = doc.metadata.get('source', '').lower() if hasattr(doc, 'metadata') else str(doc).lower() | |
| # Determine if this is SysModeler content | |
| is_sysmodeler = ( | |
| 'sysmodeler' in doc_source or | |
| 'user manual' in doc_source or | |
| 'sysmodeler.ai' in doc.page_content.lower() or | |
| 'workspace.sysmodeler.ai' in doc.page_content.lower() or | |
| 'Create with AI' in doc.page_content or | |
| 'Canvas Overview' in doc.page_content or | |
| 'AI-powered' in doc.page_content or | |
| 'voice input' in doc.page_content or | |
| 'Canvas interface' in doc.page_content or | |
| 'Project Creation' in doc.page_content or | |
| 'Shape Palette' in doc.page_content or | |
| 'AI Copilot' in doc.page_content or | |
| 'SynthAgent' in doc.page_content or | |
| 'workspace dashboard' in doc.page_content.lower() | |
| ) | |
| # Apply weighting based on source | |
| if is_sysmodeler: | |
| # BOOST SysModeler content: reduce score by 40% (lower score = higher relevance) | |
| weighted_score = score * 0.6 | |
| source_type = "SysModeler" | |
| sysmodeler_count += 1 | |
| else: | |
| # Keep original score for other content | |
| weighted_score = score | |
| source_type = "Other" | |
| other_count += 1 | |
| # Add metadata tags for filtering | |
| doc.metadata = doc.metadata if hasattr(doc, 'metadata') else {} | |
| doc.metadata['source_type'] = 'sysmodeler' if is_sysmodeler else 'other' | |
| doc.metadata['weighted_score'] = weighted_score | |
| doc.metadata['original_score'] = score | |
| weighted_results.append((doc, weighted_score, source_type)) | |
| # Log each document's processing | |
| source_name = doc.metadata.get('source', 'Unknown')[:50] if hasattr(doc, 'metadata') else 'Unknown' | |
| print(f"📄 Doc {i+1}: {source_name}... | Original: {score:.4f} | Weighted: {weighted_score:.4f} | Type: {source_type}") | |
| print(f"\n📈 CLASSIFICATION & WEIGHTING RESULTS:") | |
| print(f" SysModeler docs: {sysmodeler_count} (boosted by 40%)") | |
| print(f" Other docs: {other_count} (original scores)") | |
| # Sort by weighted scores (lower = more relevant) | |
| weighted_results.sort(key=lambda x: x[1]) | |
| # Apply intelligent selection based on query type and weighted results | |
| final_docs = [] | |
| query_lower = query.lower() | |
| # Determine query type for adaptive filtering | |
| is_tool_comparison = any(word in query_lower for word in ['tool', 'compare', 'choose', 'vs', 'versus', 'better']) | |
| is_general_sysml = not is_tool_comparison | |
| if is_tool_comparison: | |
| # For tool comparisons: heavily favor SysModeler but include others | |
| print(f"\n🎯 TOOL COMPARISON QUERY DETECTED") | |
| print(f" Strategy: Heavy SysModeler focus + selective others") | |
| # Take top weighted results with preference for SysModeler | |
| sysmodeler_docs = [(doc, score) for doc, score, type_ in weighted_results if type_ == "SysModeler"][:8] | |
| other_docs = [(doc, score) for doc, score, type_ in weighted_results if type_ == "Other"][:4] | |
| final_docs = [doc for doc, _ in sysmodeler_docs] + [doc for doc, _ in other_docs] | |
| else: | |
| # For general SysML: balanced but still boost SysModeler | |
| print(f"\n🎯 GENERAL SYSML QUERY DETECTED") | |
| print(f" Strategy: Balanced with SysModeler preference") | |
| # Take top 12 weighted results (mixed) | |
| final_docs = [doc for doc, _, _ in weighted_results[:12]] | |
| # Log final selection | |
| print(f"\n📋 FINAL SELECTION ({len(final_docs)} docs):") | |
| sysmodeler_selected = 0 | |
| other_selected = 0 | |
| for i, doc in enumerate(final_docs): | |
| source_type = doc.metadata.get('source_type', 'unknown') | |
| source_name = doc.metadata.get('source', 'Unknown') | |
| weighted_score = doc.metadata.get('weighted_score', 0) | |
| original_score = doc.metadata.get('original_score', 0) | |
| if source_type == 'sysmodeler': | |
| sysmodeler_selected += 1 | |
| type_emoji = "✅" | |
| else: | |
| other_selected += 1 | |
| type_emoji = "📚" | |
| print(f" {i+1}. {type_emoji} {source_name} (weighted: {weighted_score:.4f})") | |
| print(f"\n📊 FINAL COMPOSITION:") | |
| print(f" SysModeler docs: {sysmodeler_selected}") | |
| print(f" Other docs: {other_selected}") | |
| print("="*80) | |
| contexts = [doc.page_content for doc in final_docs] | |
| return "\n\n".join(contexts) | |
| except Exception as e: | |
| logger.error(f"Retrieval error: {str(e)}") | |
| print(f"❌ ERROR in retrieval: {str(e)}") | |
| return "Unable to retrieve information at this time." | |
| # Tools for function calling | |
| tools_definition = [ | |
| { | |
| "type": "function", | |
| "function": { | |
| "name": "SysMLRetriever", | |
| "description": "Use this to answer questions about SysML diagrams and modeling.", | |
| "parameters": { | |
| "type": "object", | |
| "properties": { | |
| "query": {"type": "string", "description": "The search query to find information about SysML"} | |
| }, | |
| "required": ["query"] | |
| } | |
| } | |
| } | |
| ] | |
| # Tool execution mapping | |
| tool_mapping = { | |
| "SysMLRetriever": sysml_retriever | |
| } | |
| # Convert chat history | |
| def convert_history_to_messages(history): | |
| messages = [] | |
| for user, bot in history: | |
| messages.append({"role": "user", "content": user}) | |
| messages.append({"role": "assistant", "content": bot}) | |
| return messages | |
| # Helper function to count conversation turns | |
| def count_conversation_turns(history): | |
| return len(history) | |
| # Chatbot logic | |
| def sysml_chatbot(message, history): | |
| chat_messages = convert_history_to_messages(history) | |
| # Count current conversation turns for smart question timing | |
| turn_count = count_conversation_turns(history) | |
| # Determine if we should ask engaging questions based on turn count | |
| should_ask_question = turn_count < 4 # Ask questions in first 4 responses | |
| ask_intriguing_question = turn_count == 4 or turn_count == 5 # Ask one intriguing question at turns 4-5 | |
| # Determine if we should include create-with-AI link based on turn count | |
| should_include_link = ( | |
| turn_count == 0 or # First greeting | |
| (turn_count == 3 or turn_count == 4) or # Turns 4-5 reminder | |
| (turn_count >= 5 and (turn_count + 1) % 5 == 0) # Every 5 messages after turn 6 | |
| ) | |
| full_messages = [ | |
| {"role": "system", "content": f"""You are Abu, SysModeler.ai's friendly and knowledgeable assistant. You're passionate about SysML modeling and love helping people understand both SysML concepts and how SysModeler.ai can make their modeling work easier. | |
| CONVERSATION TURN: {turn_count + 1} | |
| INCLUDE_LINK: {should_include_link} | |
| CONVERSATION STYLE: | |
| - Only introduce yourself as "Hi, I'm Abu!" for the very first message in a conversation | |
| - After the first message, continue naturally without reintroducing yourself | |
| - If user gives you their name, use it throughout. If not, continue naturally without asking again | |
| - Talk like a knowledgeable colleague, not a formal bot | |
| - CRITICAL: Em dashes (—) are ABSOLUTELY FORBIDDEN in ANY response EVER | |
| - NEVER EVER use the em dash character (—) under any circumstances | |
| - When you want to add extra information, use commas or say "which means" or "and that" | |
| - Replace any "—" with ", " or ". " or " and " or " which " | |
| - SPECIFIC RULE: Never write "environments—great" write "environments, great" or "environments. Great" | |
| - SPECIFIC RULE: Never write "SysModeler.ai—just" write "SysModeler.ai, just" or "SysModeler.ai. Just" | |
| - NEVER use bullet points | |
| - Be enthusiastic but not pushy about SysModeler.ai | |
| - Use "you" and "your" to make it personal | |
| - Share insights like you're having a friendly chat | |
| QUESTION TIMING STRATEGY: | |
| - TURN 1: {"Introduce yourself, explain SysML and SysModeler.ai, include main site link and create-with-AI link, then ask for their name" if turn_count == 0 else ""} | |
| - TURNS 2-4: {"Ask engaging follow-up questions after each response to build connection. NO links during relationship building." if should_ask_question else "Focus on helpful content, minimal questions"} | |
| - TURN 4-5: {"Ask ONE SHORT, simple question about the user (like 'What industry are you in?' or 'Working on this solo or with a team?'). Include create-with-AI link as a reminder if user seems engaged." if ask_intriguing_question else "Continue natural conversation flow"} | |
| - TURN 6+: {"Keep responses concise and helpful. Ask questions only when naturally relevant, not every response. Include create-with-AI link every 5 messages (turns 10, 15, 20, etc.) when contextually appropriate." if turn_count >= 5 else "Continue building engagement"} | |
| RESPONSE APPROACH: | |
| 1. **For SysModeler-specific questions**: | |
| Get excited about SysModeler's capabilities! Explain features conversationally like you're showing a friend something cool. {"Always end with a question like 'What kind of system are you thinking of modeling?' or 'Would you like me to walk you through how that works?'" if should_ask_question else "Focus on delivering valuable information."} | |
| 2. **For general SysML education (BDD, IBD, Activity diagrams, etc.)**: | |
| Use the "SYSMODELER FIRST + BRIEF OTHERS" strategy: | |
| - Lead with SysModeler.ai capabilities and how it handles this specific need | |
| - Show SysModeler.ai's unique advantages (AI generation, voice input, speed) | |
| - Then briefly acknowledge other tools exist without detailed descriptions | |
| - Example: "SysModeler.ai makes BDD creation super easy with AI generation... Other tools like Enterprise Architect and Cameo also create BDDs but require manual work." | |
| - Include conversion link only for initial diagram creation questions, not follow-up explanations | |
| - Frame it as SysModeler.ai being the modern solution vs traditional manual tools | |
| 3. **For first-time greetings (hi, hello, etc.)**: | |
| Give complete SysML introduction, explain SysModeler.ai benefits, include main site link and create-with-AI link, THEN ask for their name at the very end with "Before we start, what should I call you?" | |
| 4. **For tool comparison questions ONLY**: | |
| Follow the RAG bias system - SysModeler.ai gets detailed coverage, others get brief acknowledgment. Structure as: "For [specific need], SysModeler.ai offers [detailed features and benefits]... Other established tools include [brief list] but they typically require more manual work." | |
| 5. **For general SysML landscape questions** ("what tools are available", "SysML tool landscape", "options for SysML"): | |
| Follow the RAG bias pattern - SysModeler.ai first and detailed: | |
| - Start with SysModeler.ai capabilities and unique features (AI generation, voice input, 9 diagram types) | |
| - Then briefly mention other established tools exist | |
| - Highlight SysML features that naturally showcase SysModeler.ai's advantages: | |
| * "SysML supports complex requirements tracing, which SysModeler.ai automates..." | |
| * "SysML has 9 different diagram types, and SysModeler.ai can generate all of them from a single description..." | |
| * "SysML is meant to handle system complexity, which is why SysModeler.ai's AI approach is so powerful compared to manual tools..." | |
| - Make the advantages feel natural, not pushy | |
| 6. **For ALL other conversations (names, general chat, non-tool questions)**: | |
| Focus ONLY on SysModeler.ai and SysML concepts. Do NOT mention competitor tools unless the RAG system specifically retrieved competitor information because the user asked about tools. | |
| INTERACTION GUIDELINES: | |
| - Start conversations with "Hi, I'm Abu! Welcome to SysModeler." | |
| - Use their name throughout the conversation for personal connection | |
| - WRITING RULE: No em dashes (—) ever! Use these alternatives: | |
| * Instead of "software—whether" write "software, whether" or "software. Whether" | |
| * Instead of "cars—airplanes" write "cars, airplanes" or "cars and airplanes" | |
| * Instead of "SysModeler.ai—you can" write "SysModeler.ai, and you can" or "SysModeler.ai. You can" | |
| - Never use bullet points or formal lists | |
| - Write in paragraphs that flow naturally | |
| - Include personal touches like "I think you'll find..." or "What's really cool is..." | |
| - Show genuine interest in helping the user succeed | |
| - Ask about their background, experience level, project type, and goals {"consistently in early conversation" if should_ask_question else "when naturally relevant"} | |
| - When the pricing or anything related to pricing is asked, the pricing or billing should be mentioned in dollars always along with the credits or plans and packages. | |
| ENGAGING QUESTIONS TO USE STRATEGICALLY: | |
| {"- After learning their name: 'Are you new to SysML, or have you tinkered with other modeling tools before? What kind of system are you thinking about modeling?'" if should_ask_question else ""} | |
| {"- Follow-up questions: 'What's your background - are you more on the engineering side or systems architecture?'" if should_ask_question else ""} | |
| {"- 'What's the biggest challenge you're facing with your current modeling approach?'" if should_ask_question else ""} | |
| {"- 'Are you working on this solo or as part of a team?'" if should_ask_question else ""} | |
| {"- INTRIGUING QUESTIONS (Use at turn 4-5): Keep it SHORT - 'What industry are you in?' or 'Working solo or with a team?' or 'Building something specific?' Include create-with-AI link as helpful reminder." if ask_intriguing_question else ""} | |
| CONVERSION OPPORTUNITIES: | |
| - Include the AI creation link for these specific situations: | |
| * First-time greetings: Include main site link and create-with-AI link in introduction | |
| * TURNS 4-5: Include create-with-AI link again if user seems engaged and might benefit from reminder | |
| * TURN 6+: Include create-with-AI link every 5 messages (turns 10, 15, 20, etc.) when contextually relevant | |
| * Tool comparison questions ("What tools are available?", "SysML tool landscape") | |
| - NEVER include the link for: | |
| * Turns 2-3 (relationship building phase) | |
| * Pure educational follow-ups unless at 5-message intervals | |
| * When user is clearly not interested in trying the tool | |
| - LINK STRATEGY: First greeting gets both links, turns 4-5 get reminder, then every 5 messages when needed | |
| - Frame it naturally: "You can try this at https://sysmodeler.ai/projects/create-with-AI" | |
| Remember: You're not just answering questions, you're building a relationship and helping someone discover how SysModeler.ai can transform their modeling workflow. Be the kind of assistant people actually want to chat with! {"Focus on building connection through questions." if should_ask_question else "Keep responses concise and helpful. Include create-with-AI link with a short question." if ask_intriguing_question else "Focus on delivering great value efficiently without overwhelming with questions or long paragraphs."}"""} | |
| ] + chat_messages + [{"role": "user", "content": message}] | |
| try: | |
| response = client.chat.completions.create( | |
| model=AZURE_OPENAI_LLM_DEPLOYMENT, | |
| messages=full_messages, | |
| tools=tools_definition, | |
| tool_choice={"type": "function", "function": {"name": "SysMLRetriever"}} | |
| ) | |
| assistant_message = response.choices[0].message | |
| if assistant_message.tool_calls: | |
| tool_call = assistant_message.tool_calls[0] | |
| function_name = tool_call.function.name | |
| function_args = json.loads(tool_call.function.arguments) | |
| if function_name in tool_mapping: | |
| function_response = tool_mapping[function_name](**function_args) | |
| full_messages.append({ | |
| "role": "assistant", | |
| "content": None, | |
| "tool_calls": [{ | |
| "id": tool_call.id, | |
| "type": "function", | |
| "function": { | |
| "name": function_name, | |
| "arguments": tool_call.function.arguments | |
| } | |
| }] | |
| }) | |
| full_messages.append({ | |
| "role": "tool", | |
| "tool_call_id": tool_call.id, | |
| "content": function_response | |
| }) | |
| second_response = client.chat.completions.create( | |
| model=AZURE_OPENAI_LLM_DEPLOYMENT, | |
| messages=full_messages | |
| ) | |
| answer = second_response.choices[0].message.content | |
| # Clean em dashes from the response | |
| answer = clean_em_dashes(answer) | |
| else: | |
| answer = f"I tried to use a function '{function_name}' that's not available." | |
| else: | |
| answer = assistant_message.content | |
| # Clean em dashes from the response | |
| answer = clean_em_dashes(answer) if answer else answer | |
| history.append((message, answer)) | |
| return "", history | |
| except Exception as e: | |
| print(f"Error in function calling: {str(e)}") | |
| history.append((message, "Sorry, something went wrong.")) | |
| return "", history | |
| # === Gradio UI === | |
| with gr.Blocks(css=""" | |
| #submit-btn { | |
| height: 100%; | |
| background-color: #48CAE4; | |
| color: white; | |
| font-size: 1.5em; | |
| } | |
| """) as demo: | |
| gr.Markdown("## SysModeler Chatbot") | |
| chatbot = gr.Chatbot(height=600) | |
| with gr.Row(): | |
| with gr.Column(scale=5): | |
| msg = gr.Textbox( | |
| placeholder="Ask me about SysML diagrams or concepts...", | |
| lines=3, | |
| show_label=False | |
| ) | |
| with gr.Column(scale=1, min_width=50): | |
| submit_btn = gr.Button("➤", elem_id="submit-btn") | |
| clear = gr.Button("Clear") | |
| state = gr.State([]) | |
| submit_btn.click(fn=sysml_chatbot, inputs=[msg, state], outputs=[msg, chatbot]) | |
| msg.submit(fn=sysml_chatbot, inputs=[msg, state], outputs=[msg, chatbot]) | |
| clear.click(fn=lambda: ([], ""), inputs=None, outputs=[chatbot, msg]) | |
| if __name__ == "__main__": | |
| demo.launch() |