| import gradio as gr |
| import torch |
| from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline |
| import json |
| import warnings |
| from typing import List, Dict, Any |
|
|
| |
| warnings.filterwarnings("ignore") |
|
|
| |
| model_name = "microsoft/Phi-3.5-MoE-instruct" |
|
|
| try: |
| print("Loading tokenizer...") |
| tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True) |
| |
| print("Loading model...") |
| |
| model = AutoModelForCausalLM.from_pretrained( |
| model_name, |
| torch_dtype=torch.float16, |
| device_map="auto", |
| trust_remote_code=True, |
| low_cpu_mem_usage=True |
| ) |
| |
| print("Creating pipeline...") |
| |
| pipe = pipeline( |
| "text-generation", |
| model=model, |
| tokenizer=tokenizer, |
| torch_dtype=torch.float16, |
| device_map="auto", |
| trust_remote_code=True |
| ) |
| |
| print("Model loaded successfully!") |
| |
| except Exception as e: |
| print(f"Error loading model: {e}") |
| print("This is likely due to missing dependencies (einops, flash_attn) or memory constraints.") |
| print("The model will run in fallback mode.") |
| |
| pipe = None |
| tokenizer = None |
|
|
| def classify_query_type(query: str) -> str: |
| """Classify query to determine expert specialization""" |
| query_lower = query.lower() |
| |
| expert_keywords = { |
| "Code": ["programming", "software", "development", "coding", "algorithm", "python", "javascript", "java", "function", "code"], |
| "Math": ["mathematics", "calculation", "equation", "formula", "statistics", "derivative", "integral", "algebra", "calculus", "math", "solve", "calculate"], |
| "Reasoning": ["logic", "analysis", "reasoning", "problem-solving", "critical", "explain", "why", "how", "because"], |
| "Multilingual": ["translation", "language", "multilingual", "localization", "translate", "spanish", "french", "german"], |
| "General": ["general", "conversation", "assistance", "help", "hello", "hi", "what", "who", "when", "where"] |
| } |
| |
| scores = {} |
| for expert, keywords in expert_keywords.items(): |
| score = sum(1 for keyword in keywords if keyword in query_lower) |
| scores[expert] = score |
| |
| if scores: |
| best_expert = max(scores.items(), key=lambda x: x[1])[0] |
| if scores[best_expert] > 0: |
| return best_expert |
| |
| return "General" |
|
|
| def generate_fallback_response(query: str, expert_type: str) -> str: |
| """Generate a fallback response when the model is not available""" |
| fallback_responses = { |
| "Code": f"I'm a Code Expert, but the Phi-3.5-MoE model is currently unavailable. For your question about '{query}', I would typically provide detailed code examples and programming guidance. Please try again later when the model is loaded.", |
| "Math": f"I'm a Math Expert, but the Phi-3.5-MoE model is currently unavailable. For your question about '{query}', I would typically solve mathematical problems step-by-step. Please try again later when the model is loaded.", |
| "Reasoning": f"I'm a Reasoning Expert, but the Phi-3.5-MoE model is currently unavailable. For your question about '{query}', I would typically provide logical analysis and systematic problem-solving. Please try again later when the model is loaded.", |
| "Multilingual": f"I'm a Multilingual Expert, but the Phi-3.5-MoE model is currently unavailable. For your question about '{query}', I would typically help with translations and language learning. Please try again later when the model is loaded.", |
| "General": f"I'm a General Expert, but the Phi-3.5-MoE model is currently unavailable. For your question about '{query}', I would typically provide helpful and informative responses. Please try again later when the model is loaded." |
| } |
| return fallback_responses.get(expert_type, fallback_responses["General"]) |
|
|
| def generate_response(query: str, max_tokens: int = 500, temperature: float = 0.7) -> str: |
| """Generate response using Phi-3.5-MoE""" |
| try: |
| |
| expert_type = classify_query_type(query) |
| |
| if pipe is None or tokenizer is None: |
| return f"**Expert Type:** {expert_type}\\n\\n**Response:**\\n{generate_fallback_response(query, expert_type)}" |
| |
| |
| system_messages = { |
| "Code": "You are an expert software engineer and programming assistant. Provide clear, well-commented code examples and explain programming concepts thoroughly.", |
| "Math": "You are a mathematics expert. Solve problems step-by-step, show your work, and explain mathematical concepts clearly.", |
| "Reasoning": "You are a logical reasoning expert. Break down complex problems, analyze them systematically, and provide clear explanations.", |
| "Multilingual": "You are a multilingual expert. Help with translations, language learning, and cross-cultural communication.", |
| "General": "You are a helpful AI assistant. Provide accurate, helpful, and informative responses to user questions." |
| } |
| |
| system_message = system_messages.get(expert_type, system_messages["General"]) |
| |
| |
| messages = [ |
| {"role": "system", "content": system_message}, |
| {"role": "user", "content": query} |
| ] |
| |
| |
| response = pipe( |
| messages, |
| max_new_tokens=max_tokens, |
| temperature=temperature, |
| do_sample=True, |
| pad_token_id=tokenizer.eos_token_id |
| ) |
| |
| |
| generated_text = response[0]['generated_text'] |
| |
| |
| if "Assistant:" in generated_text: |
| assistant_response = generated_text.split("Assistant:")[-1].strip() |
| else: |
| assistant_response = generated_text |
| |
| return f"**Expert Type:** {expert_type}\\n\\n**Response:**\\n{assistant_response}" |
| |
| except Exception as e: |
| return f"❌ **Error generating response:** {str(e)}\\n\\nPlease try again or check the logs for more details." |
|
|
| def create_interface(): |
| """Create Gradio interface""" |
| |
| with gr.Blocks(title="Phi-3.5-MoE Expert Assistant", theme=gr.themes.Soft()) as demo: |
| gr.Markdown("# 🤖 Phi-3.5-MoE Expert Assistant") |
| gr.Markdown(""" |
| This is a specialized AI assistant powered by Microsoft's Phi-3.5-MoE model. |
| It automatically routes your queries to the most appropriate expert: |
| - **Code Expert**: Programming, software development, algorithms |
| - **Math Expert**: Mathematics, calculations, problem solving |
| - **Reasoning Expert**: Logic, analysis, critical thinking |
| - **Multilingual Expert**: Translation and language assistance |
| - **General Expert**: General purpose assistance |
| """) |
| |
| with gr.Row(): |
| with gr.Column(scale=3): |
| query_input = gr.Textbox( |
| label="Your Question", |
| placeholder="Ask me anything...", |
| lines=3 |
| ) |
| |
| with gr.Row(): |
| max_tokens = gr.Slider( |
| minimum=50, |
| maximum=1000, |
| value=500, |
| step=50, |
| label="Max Tokens" |
| ) |
| temperature = gr.Slider( |
| minimum=0.1, |
| maximum=1.0, |
| value=0.7, |
| step=0.1, |
| label="Temperature" |
| ) |
| |
| submit_btn = gr.Button("Generate Response", variant="primary") |
| |
| with gr.Column(scale=2): |
| response_output = gr.Markdown(label="Response") |
| |
| |
| gr.Markdown("### 💡 Example Queries") |
| examples = [ |
| "How do I implement a binary search algorithm in Python?", |
| "What is the derivative of x² + 3x + 1?", |
| "Explain the logical reasoning behind the Monty Hall problem", |
| "Translate 'Hello, how are you?' to Spanish", |
| "What are the benefits of renewable energy?" |
| ] |
| |
| gr.Examples( |
| examples=examples, |
| inputs=query_input |
| ) |
| |
| |
| submit_btn.click( |
| fn=generate_response, |
| inputs=[query_input, max_tokens, temperature], |
| outputs=response_output |
| ) |
| |
| query_input.submit( |
| fn=generate_response, |
| inputs=[query_input, max_tokens, temperature], |
| outputs=response_output |
| ) |
| |
| return demo |
|
|
| |
| if __name__ == "__main__": |
| demo = create_interface() |
| demo.launch(server_name="0.0.0.0", server_port=7860) |
|
|