ianshank
/

phi35-moe-demo

Model card Files Files and versions

xet

Community

ianshank commited on Sep 13, 2025

Commit

2716eda

verified ·

1 Parent(s): bda45f1

Upload app.py with huggingface_hub

Browse files

Files changed (1) hide show

app.py +210 -0

app.py ADDED Viewed

	@@ -0,0 +1,210 @@

+import gradio as gr
+import torch
+from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
+import json
+import warnings
+from typing import List, Dict, Any
+# Suppress warnings for cleaner output
+warnings.filterwarnings("ignore")
+# Load model and tokenizer with error handling
+model_name = "microsoft/Phi-3.5-MoE-instruct"
+try:
+    print("Loading tokenizer...")
+    tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
+    print("Loading model...")
+    # Use CPU-compatible settings for Hugging Face Spaces
+    model = AutoModelForCausalLM.from_pretrained(
+        model_name,
+        torch_dtype=torch.float16,  # Use float16 instead of bfloat16 for better compatibility
+        device_map="auto",
+        trust_remote_code=True,
+        low_cpu_mem_usage=True
+    )
+    print("Creating pipeline...")
+    # Create pipeline
+    pipe = pipeline(
+        "text-generation",
+        model=model,
+        tokenizer=tokenizer,
+        torch_dtype=torch.float16,
+        device_map="auto",
+        trust_remote_code=True
+    )
+    print("Model loaded successfully!")
+except Exception as e:
+    print(f"Error loading model: {e}")
+    print("This is likely due to missing dependencies (einops, flash_attn) or memory constraints.")
+    print("The model will run in fallback mode.")
+    # Create a fallback pipeline for demo purposes
+    pipe = None
+    tokenizer = None
+def classify_query_type(query: str) -> str:
+    """Classify query to determine expert specialization"""
+    query_lower = query.lower()
+    expert_keywords = {
+        "Code": ["programming", "software", "development", "coding", "algorithm", "python", "javascript", "java", "function", "code"],
+        "Math": ["mathematics", "calculation", "equation", "formula", "statistics", "derivative", "integral", "algebra", "calculus", "math", "solve", "calculate"],
+        "Reasoning": ["logic", "analysis", "reasoning", "problem-solving", "critical", "explain", "why", "how", "because"],
+        "Multilingual": ["translation", "language", "multilingual", "localization", "translate", "spanish", "french", "german"],
+        "General": ["general", "conversation", "assistance", "help", "hello", "hi", "what", "who", "when", "where"]
+    }
+    scores = {}
+    for expert, keywords in expert_keywords.items():
+        score = sum(1 for keyword in keywords if keyword in query_lower)
+        scores[expert] = score
+    if scores:
+        best_expert = max(scores.items(), key=lambda x: x[1])[0]
+        if scores[best_expert] > 0:
+            return best_expert
+    return "General"
+def generate_fallback_response(query: str, expert_type: str) -> str:
+    """Generate a fallback response when the model is not available"""
+    fallback_responses = {
+        "Code": f"I'm a Code Expert, but the Phi-3.5-MoE model is currently unavailable. For your question about '{query}', I would typically provide detailed code examples and programming guidance. Please try again later when the model is loaded.",
+        "Math": f"I'm a Math Expert, but the Phi-3.5-MoE model is currently unavailable. For your question about '{query}', I would typically solve mathematical problems step-by-step. Please try again later when the model is loaded.",
+        "Reasoning": f"I'm a Reasoning Expert, but the Phi-3.5-MoE model is currently unavailable. For your question about '{query}', I would typically provide logical analysis and systematic problem-solving. Please try again later when the model is loaded.",
+        "Multilingual": f"I'm a Multilingual Expert, but the Phi-3.5-MoE model is currently unavailable. For your question about '{query}', I would typically help with translations and language learning. Please try again later when the model is loaded.",
+        "General": f"I'm a General Expert, but the Phi-3.5-MoE model is currently unavailable. For your question about '{query}', I would typically provide helpful and informative responses. Please try again later when the model is loaded."
+    }
+    return fallback_responses.get(expert_type, fallback_responses["General"])
+def generate_response(query: str, max_tokens: int = 500, temperature: float = 0.7) -> str:
+    """Generate response using Phi-3.5-MoE"""
+    try:
+        # Classify query type
+        expert_type = classify_query_type(query)
+        if pipe is None or tokenizer is None:
+            return f"**Expert Type:** {expert_type}\\n\\n**Response:**\\n{generate_fallback_response(query, expert_type)}"
+        # Create system message based on expert type
+        system_messages = {
+            "Code": "You are an expert software engineer and programming assistant. Provide clear, well-commented code examples and explain programming concepts thoroughly.",
+            "Math": "You are a mathematics expert. Solve problems step-by-step, show your work, and explain mathematical concepts clearly.",
+            "Reasoning": "You are a logical reasoning expert. Break down complex problems, analyze them systematically, and provide clear explanations.",
+            "Multilingual": "You are a multilingual expert. Help with translations, language learning, and cross-cultural communication.",
+            "General": "You are a helpful AI assistant. Provide accurate, helpful, and informative responses to user questions."
+        }
+        system_message = system_messages.get(expert_type, system_messages["General"])
+        # Format messages
+        messages = [
+            {"role": "system", "content": system_message},
+            {"role": "user", "content": query}
+        ]
+        # Generate response
+        response = pipe(
+            messages,
+            max_new_tokens=max_tokens,
+            temperature=temperature,
+            do_sample=True,
+            pad_token_id=tokenizer.eos_token_id
+        )
+        # Extract response text
+        generated_text = response[0]['generated_text']
+        # Find the assistant's response
+        if "Assistant:" in generated_text:
+            assistant_response = generated_text.split("Assistant:")[-1].strip()
+        else:
+            assistant_response = generated_text
+        return f"**Expert Type:** {expert_type}\\n\\n**Response:**\\n{assistant_response}"
+    except Exception as e:
+        return f"❌ **Error generating response:** {str(e)}\\n\\nPlease try again or check the logs for more details."
+def create_interface():
+    """Create Gradio interface"""
+    with gr.Blocks(title="Phi-3.5-MoE Expert Assistant", theme=gr.themes.Soft()) as demo:
+        gr.Markdown("# 🤖 Phi-3.5-MoE Expert Assistant")
+        gr.Markdown("""
+        This is a specialized AI assistant powered by Microsoft's Phi-3.5-MoE model.
+        It automatically routes your queries to the most appropriate expert:
+        - **Code Expert**: Programming, software development, algorithms
+        - **Math Expert**: Mathematics, calculations, problem solving
+        - **Reasoning Expert**: Logic, analysis, critical thinking
+        - **Multilingual Expert**: Translation and language assistance
+        - **General Expert**: General purpose assistance
+        """)
+        with gr.Row():
+            with gr.Column(scale=3):
+                query_input = gr.Textbox(
+                    label="Your Question",
+                    placeholder="Ask me anything...",
+                    lines=3
+                )
+                with gr.Row():
+                    max_tokens = gr.Slider(
+                        minimum=50,
+                        maximum=1000,
+                        value=500,
+                        step=50,
+                        label="Max Tokens"
+                    )
+                    temperature = gr.Slider(
+                        minimum=0.1,
+                        maximum=1.0,
+                        value=0.7,
+                        step=0.1,
+                        label="Temperature"
+                    )
+                submit_btn = gr.Button("Generate Response", variant="primary")
+            with gr.Column(scale=2):
+                response_output = gr.Markdown(label="Response")
+        # Example queries
+        gr.Markdown("### 💡 Example Queries")
+        examples = [
+            "How do I implement a binary search algorithm in Python?",
+            "What is the derivative of x² + 3x + 1?",
+            "Explain the logical reasoning behind the Monty Hall problem",
+            "Translate 'Hello, how are you?' to Spanish",
+            "What are the benefits of renewable energy?"
+        ]
+        gr.Examples(
+            examples=examples,
+            inputs=query_input
+        )
+        # Event handlers
+        submit_btn.click(
+            fn=generate_response,
+            inputs=[query_input, max_tokens, temperature],
+            outputs=response_output
+        )
+        query_input.submit(
+            fn=generate_response,
+            inputs=[query_input, max_tokens, temperature],
+            outputs=response_output
+        )
+    return demo
+# Create and launch the interface
+if __name__ == "__main__":
+    demo = create_interface()
+    demo.launch(server_name="0.0.0.0", server_port=7860)