ianshank commited on
Commit
2716eda
·
verified ·
1 Parent(s): bda45f1

Upload app.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +210 -0
app.py ADDED
@@ -0,0 +1,210 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
4
+ import json
5
+ import warnings
6
+ from typing import List, Dict, Any
7
+
8
+ # Suppress warnings for cleaner output
9
+ warnings.filterwarnings("ignore")
10
+
11
+ # Load model and tokenizer with error handling
12
+ model_name = "microsoft/Phi-3.5-MoE-instruct"
13
+
14
+ try:
15
+ print("Loading tokenizer...")
16
+ tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
17
+
18
+ print("Loading model...")
19
+ # Use CPU-compatible settings for Hugging Face Spaces
20
+ model = AutoModelForCausalLM.from_pretrained(
21
+ model_name,
22
+ torch_dtype=torch.float16, # Use float16 instead of bfloat16 for better compatibility
23
+ device_map="auto",
24
+ trust_remote_code=True,
25
+ low_cpu_mem_usage=True
26
+ )
27
+
28
+ print("Creating pipeline...")
29
+ # Create pipeline
30
+ pipe = pipeline(
31
+ "text-generation",
32
+ model=model,
33
+ tokenizer=tokenizer,
34
+ torch_dtype=torch.float16,
35
+ device_map="auto",
36
+ trust_remote_code=True
37
+ )
38
+
39
+ print("Model loaded successfully!")
40
+
41
+ except Exception as e:
42
+ print(f"Error loading model: {e}")
43
+ print("This is likely due to missing dependencies (einops, flash_attn) or memory constraints.")
44
+ print("The model will run in fallback mode.")
45
+ # Create a fallback pipeline for demo purposes
46
+ pipe = None
47
+ tokenizer = None
48
+
49
+ def classify_query_type(query: str) -> str:
50
+ """Classify query to determine expert specialization"""
51
+ query_lower = query.lower()
52
+
53
+ expert_keywords = {
54
+ "Code": ["programming", "software", "development", "coding", "algorithm", "python", "javascript", "java", "function", "code"],
55
+ "Math": ["mathematics", "calculation", "equation", "formula", "statistics", "derivative", "integral", "algebra", "calculus", "math", "solve", "calculate"],
56
+ "Reasoning": ["logic", "analysis", "reasoning", "problem-solving", "critical", "explain", "why", "how", "because"],
57
+ "Multilingual": ["translation", "language", "multilingual", "localization", "translate", "spanish", "french", "german"],
58
+ "General": ["general", "conversation", "assistance", "help", "hello", "hi", "what", "who", "when", "where"]
59
+ }
60
+
61
+ scores = {}
62
+ for expert, keywords in expert_keywords.items():
63
+ score = sum(1 for keyword in keywords if keyword in query_lower)
64
+ scores[expert] = score
65
+
66
+ if scores:
67
+ best_expert = max(scores.items(), key=lambda x: x[1])[0]
68
+ if scores[best_expert] > 0:
69
+ return best_expert
70
+
71
+ return "General"
72
+
73
+ def generate_fallback_response(query: str, expert_type: str) -> str:
74
+ """Generate a fallback response when the model is not available"""
75
+ fallback_responses = {
76
+ "Code": f"I'm a Code Expert, but the Phi-3.5-MoE model is currently unavailable. For your question about '{query}', I would typically provide detailed code examples and programming guidance. Please try again later when the model is loaded.",
77
+ "Math": f"I'm a Math Expert, but the Phi-3.5-MoE model is currently unavailable. For your question about '{query}', I would typically solve mathematical problems step-by-step. Please try again later when the model is loaded.",
78
+ "Reasoning": f"I'm a Reasoning Expert, but the Phi-3.5-MoE model is currently unavailable. For your question about '{query}', I would typically provide logical analysis and systematic problem-solving. Please try again later when the model is loaded.",
79
+ "Multilingual": f"I'm a Multilingual Expert, but the Phi-3.5-MoE model is currently unavailable. For your question about '{query}', I would typically help with translations and language learning. Please try again later when the model is loaded.",
80
+ "General": f"I'm a General Expert, but the Phi-3.5-MoE model is currently unavailable. For your question about '{query}', I would typically provide helpful and informative responses. Please try again later when the model is loaded."
81
+ }
82
+ return fallback_responses.get(expert_type, fallback_responses["General"])
83
+
84
+ def generate_response(query: str, max_tokens: int = 500, temperature: float = 0.7) -> str:
85
+ """Generate response using Phi-3.5-MoE"""
86
+ try:
87
+ # Classify query type
88
+ expert_type = classify_query_type(query)
89
+
90
+ if pipe is None or tokenizer is None:
91
+ return f"**Expert Type:** {expert_type}\\n\\n**Response:**\\n{generate_fallback_response(query, expert_type)}"
92
+
93
+ # Create system message based on expert type
94
+ system_messages = {
95
+ "Code": "You are an expert software engineer and programming assistant. Provide clear, well-commented code examples and explain programming concepts thoroughly.",
96
+ "Math": "You are a mathematics expert. Solve problems step-by-step, show your work, and explain mathematical concepts clearly.",
97
+ "Reasoning": "You are a logical reasoning expert. Break down complex problems, analyze them systematically, and provide clear explanations.",
98
+ "Multilingual": "You are a multilingual expert. Help with translations, language learning, and cross-cultural communication.",
99
+ "General": "You are a helpful AI assistant. Provide accurate, helpful, and informative responses to user questions."
100
+ }
101
+
102
+ system_message = system_messages.get(expert_type, system_messages["General"])
103
+
104
+ # Format messages
105
+ messages = [
106
+ {"role": "system", "content": system_message},
107
+ {"role": "user", "content": query}
108
+ ]
109
+
110
+ # Generate response
111
+ response = pipe(
112
+ messages,
113
+ max_new_tokens=max_tokens,
114
+ temperature=temperature,
115
+ do_sample=True,
116
+ pad_token_id=tokenizer.eos_token_id
117
+ )
118
+
119
+ # Extract response text
120
+ generated_text = response[0]['generated_text']
121
+
122
+ # Find the assistant's response
123
+ if "Assistant:" in generated_text:
124
+ assistant_response = generated_text.split("Assistant:")[-1].strip()
125
+ else:
126
+ assistant_response = generated_text
127
+
128
+ return f"**Expert Type:** {expert_type}\\n\\n**Response:**\\n{assistant_response}"
129
+
130
+ except Exception as e:
131
+ return f"❌ **Error generating response:** {str(e)}\\n\\nPlease try again or check the logs for more details."
132
+
133
+ def create_interface():
134
+ """Create Gradio interface"""
135
+
136
+ with gr.Blocks(title="Phi-3.5-MoE Expert Assistant", theme=gr.themes.Soft()) as demo:
137
+ gr.Markdown("# 🤖 Phi-3.5-MoE Expert Assistant")
138
+ gr.Markdown("""
139
+ This is a specialized AI assistant powered by Microsoft's Phi-3.5-MoE model.
140
+ It automatically routes your queries to the most appropriate expert:
141
+ - **Code Expert**: Programming, software development, algorithms
142
+ - **Math Expert**: Mathematics, calculations, problem solving
143
+ - **Reasoning Expert**: Logic, analysis, critical thinking
144
+ - **Multilingual Expert**: Translation and language assistance
145
+ - **General Expert**: General purpose assistance
146
+ """)
147
+
148
+ with gr.Row():
149
+ with gr.Column(scale=3):
150
+ query_input = gr.Textbox(
151
+ label="Your Question",
152
+ placeholder="Ask me anything...",
153
+ lines=3
154
+ )
155
+
156
+ with gr.Row():
157
+ max_tokens = gr.Slider(
158
+ minimum=50,
159
+ maximum=1000,
160
+ value=500,
161
+ step=50,
162
+ label="Max Tokens"
163
+ )
164
+ temperature = gr.Slider(
165
+ minimum=0.1,
166
+ maximum=1.0,
167
+ value=0.7,
168
+ step=0.1,
169
+ label="Temperature"
170
+ )
171
+
172
+ submit_btn = gr.Button("Generate Response", variant="primary")
173
+
174
+ with gr.Column(scale=2):
175
+ response_output = gr.Markdown(label="Response")
176
+
177
+ # Example queries
178
+ gr.Markdown("### 💡 Example Queries")
179
+ examples = [
180
+ "How do I implement a binary search algorithm in Python?",
181
+ "What is the derivative of x² + 3x + 1?",
182
+ "Explain the logical reasoning behind the Monty Hall problem",
183
+ "Translate 'Hello, how are you?' to Spanish",
184
+ "What are the benefits of renewable energy?"
185
+ ]
186
+
187
+ gr.Examples(
188
+ examples=examples,
189
+ inputs=query_input
190
+ )
191
+
192
+ # Event handlers
193
+ submit_btn.click(
194
+ fn=generate_response,
195
+ inputs=[query_input, max_tokens, temperature],
196
+ outputs=response_output
197
+ )
198
+
199
+ query_input.submit(
200
+ fn=generate_response,
201
+ inputs=[query_input, max_tokens, temperature],
202
+ outputs=response_output
203
+ )
204
+
205
+ return demo
206
+
207
+ # Create and launch the interface
208
+ if __name__ == "__main__":
209
+ demo = create_interface()
210
+ demo.launch(server_name="0.0.0.0", server_port=7860)