saemstunes commited on
Commit
a69100a
·
verified ·
1 Parent(s): 066ec4b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +204 -370
app.py CHANGED
@@ -1,426 +1,260 @@
 
1
  import os
2
  import gradio as gr
3
  import json
4
  import time
5
  import logging
6
  import psutil
7
- import GPUtil
8
  from datetime import datetime
9
- from typing import Dict, List, Optional, Tuple
10
  import requests
11
  import asyncio
12
  import aiohttp
13
- import threading
14
- from concurrent.futures import ThreadPoolExecutor
15
- import numpy as np
16
- import sys
17
-
18
- # Add llama.cpp to path
19
- sys.path.append('./llama.cpp')
20
- sys.path.append('./llama.cpp/python')
21
-
22
- try:
23
- from llama_cpp import Llama
24
- except ImportError:
25
- print("llama_cpp not available, using subprocess method")
26
-
27
- class SaemsTunesAISystem:
28
- def __init__(self):
29
- self.models = {}
30
- self.current_model = None
31
- self.model_loader = ModelLoader()
32
- self.performance_monitor = PerformanceMonitor()
33
- self.supabase_integration = SupabaseIntegration()
34
- self.setup_logging()
35
-
36
- def setup_logging(self):
37
- logging.basicConfig(
38
- level=logging.INFO,
39
- format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
40
- handlers=[
41
- logging.FileHandler('saems_ai_system.log'),
42
- logging.StreamHandler()
43
- ]
44
- )
45
- self.logger = logging.getLogger(__name__)
46
-
47
- def load_models(self):
48
- """Load multiple quantized models for different use cases"""
49
- model_configs = {
50
- "fast": {
51
- "path": "./models/phi3.5-mini.Q4_K_M.gguf",
52
- "name": "Phi-3.5-mini Q4_K_M (Fast)",
53
- "n_ctx": 4096,
54
- "n_threads": 4
55
- },
56
- "balanced": {
57
- "path": "./models/phi3.5-mini.Q5_K_M.gguf",
58
- "name": "Phi-3.5-mini Q5_K_M (Balanced)",
59
- "n_ctx": 8192,
60
- "n_threads": 6
61
- },
62
- "quality": {
63
- "path": "./models/phi3.5-mini.Q8_0.gguf",
64
- "name": "Phi-3.5-mini Q8_0 (Quality)",
65
- "n_ctx": 16384,
66
- "n_threads": 8
67
- }
68
- }
69
-
70
- for profile, config in model_configs.items():
71
- if os.path.exists(config["path"]):
72
- try:
73
- self.models[profile] = Llama(
74
- model_path=config["path"],
75
- n_ctx=config["n_ctx"],
76
- n_threads=config["n_threads"],
77
- verbose=False
78
- )
79
- self.logger.info(f"Loaded {config['name']} successfully")
80
- except Exception as e:
81
- self.logger.error(f"Failed to load {config['name']}: {e}")
82
-
83
- self.current_model = self.models.get("fast")
84
-
85
- def get_music_context(self, query: str) -> Dict:
86
- """Enhanced context retrieval from Supabase"""
87
- return self.supabase_integration.get_enhanced_context(query)
88
-
89
- def generate_response(self, query: str, history: List, model_profile: str = "fast") -> Dict:
90
- """Advanced response generation with multiple model support"""
91
- start_time = time.time()
92
-
93
- # Switch model if requested
94
- if model_profile in self.models:
95
- self.current_model = self.models[model_profile]
96
-
97
- # Get comprehensive context
98
- context = self.get_music_context(query)
99
-
100
- # Build sophisticated prompt
101
- prompt = self.build_advanced_prompt(query, history, context)
102
-
103
- try:
104
- # Generate response
105
- response = self.current_model.create_chat_completion(
106
- messages=[
107
- {"role": "system", "content": prompt["system"]},
108
- {"role": "user", "content": prompt["user"]}
109
- ],
110
- max_tokens=400,
111
- temperature=0.7,
112
- top_p=0.9,
113
- stop=["</s>", "###"],
114
- stream=False
115
- )
116
-
117
- processing_time = time.time() - start_time
118
-
119
- # Log performance
120
- self.performance_monitor.record_inference(
121
- model_profile, processing_time, len(query)
122
- )
123
-
124
- return {
125
- "response": response["choices"][0]["message"]["content"],
126
- "processing_time": processing_time,
127
- "tokens_used": response["usage"]["total_tokens"],
128
- "model_used": model_profile,
129
- "context_used": context.get("summary", "")
130
- }
131
-
132
- except Exception as e:
133
- self.logger.error(f"Generation error: {e}")
134
- return {
135
- "response": "I apologize, but I encountered an error processing your request.",
136
- "error": str(e),
137
- "processing_time": time.time() - start_time
138
- }
139
-
140
- def build_advanced_prompt(self, query: str, history: List, context: Dict) -> Dict:
141
- """Build sophisticated prompt with context and history"""
142
- system_prompt = f"""You are the AI assistant for Saem's Tunes, a comprehensive music platform.
143
 
144
- PLATFORM CONTEXT:
145
- - Music streaming and discovery service
146
- - Artist profiles and social features
147
- - Playlist creation and sharing capabilities
148
- - Music upload system for artists
149
- - Premium subscription features
 
150
 
151
- CURRENT CONTEXT:
152
- {context.get('summary', 'General music platform information')}
153
 
154
- SPECIFIC DATA:
155
- - Songs: {context.get('song_count', 0)} available
156
- - Artists: {context.get('artist_count', 0)} registered
157
- - Users: {context.get('user_count', 0)} active
158
- - Recent Activity: {context.get('recent_activity', 'Normal traffic')}
159
 
160
- RESPONSE GUIDELINES:
161
- - Be helpful, passionate about music, and knowledgeable
162
- - Provide specific, actionable information when possible
163
- - Reference platform features when relevant
164
- - Keep responses under 200 words unless complex technical explanation needed
165
- - If unsure, offer to help find the answer through other means"""
166
 
167
- # Include conversation history
168
- user_content = query
169
- if history:
170
- history_context = "\n".join([f"Previous: {h[0]}" for h in history[-3:]])
171
- user_content = f"{history_context}\nCurrent Question: {query}"
172
 
173
- return {
174
- "system": system_prompt,
175
- "user": user_content
176
- }
 
 
 
 
177
 
178
- class ModelLoader:
179
- """Advanced model loading and management"""
180
- def __init__(self):
181
- self.available_models = {}
182
- self.scan_models()
183
 
184
- def scan_models(self):
185
- """Scan for available GGUF models"""
186
- model_dir = "./models"
187
- if not os.path.exists(model_dir):
188
- os.makedirs(model_dir)
189
-
190
- for file in os.listdir(model_dir):
191
- if file.endswith('.gguf'):
192
- quant_type = self.extract_quant_type(file)
193
- self.available_models[quant_type] = {
194
- "path": os.path.join(model_dir, file),
195
- "size": os.path.getsize(os.path.join(model_dir, file)),
196
- "modified": datetime.fromtimestamp(
197
- os.path.getmtime(os.path.join(model_dir, file))
198
- )
199
- }
200
-
201
- def extract_quant_type(self, filename: str) -> str:
202
- """Extract quantization type from filename"""
203
- import re
204
- match = re.search(r'\.([QBF]\d+_?[KSMXL]?)\.gguf', filename)
205
- return match.group(1) if match else "unknown"
206
-
207
- class PerformanceMonitor:
208
- """Comprehensive performance monitoring"""
209
- def __init__(self):
210
- self.inference_times = []
211
- self.memory_usage = []
212
- self.error_log = []
213
 
214
- def record_inference(self, model: str, time_taken: float, input_length: int):
215
- """Record inference performance metrics"""
216
- self.inference_times.append({
217
- "timestamp": datetime.now(),
218
- "model": model,
219
- "time_taken": time_taken,
220
- "input_length": input_length,
221
- "memory_used": psutil.virtual_memory().used,
222
- "cpu_percent": psutil.cpu_percent()
223
- })
224
 
225
- def get_performance_stats(self) -> Dict:
226
- """Get comprehensive performance statistics"""
227
- if not self.inference_times:
228
- return {}
229
-
230
- times = [x["time_taken"] for x in self.inference_times[-100:]]
231
- return {
232
- "average_time": np.mean(times),
233
- "max_time": np.max(times),
234
- "min_time": np.min(times),
235
- "total_inferences": len(self.inference_times),
236
- "current_memory": psutil.virtual_memory().percent,
237
- "current_cpu": psutil.cpu_percent()
238
- }
239
-
240
- class SupabaseIntegration:
241
- """Enhanced Supabase integration for music context"""
242
- def __init__(self):
243
- self.url = os.getenv('SUPABASE_URL', '')
244
- self.key = os.getenv('SUPABASE_ANON_KEY', '')
245
- self.headers = {
246
- 'apikey': self.key,
247
- 'Authorization': f'Bearer {self.key}',
248
- 'Content-Type': 'application/json'
249
- }
250
 
251
- def get_enhanced_context(self, query: str) -> Dict:
252
- """Get comprehensive context from Supabase"""
253
- context = {
254
- "songs": [],
255
- "artists": [],
256
- "stats": {},
257
- "recent_activity": "",
258
- "summary": ""
259
- }
260
 
261
- try:
262
- # Get platform statistics
263
- stats = self.get_platform_stats()
264
- context["stats"] = stats
265
-
266
- # Get relevant content based on query
267
- if any(term in query.lower() for term in ['song', 'music', 'track']):
268
- context["songs"] = self.search_songs(query)
269
-
270
- if any(term in query.lower() for term in ['artist', 'band', 'musician']):
271
- context["artists"] = self.search_artists(query)
272
-
273
- # Generate summary
274
- context["summary"] = self.generate_context_summary(context, query)
275
-
276
- except Exception as e:
277
- context["summary"] = "Music platform context unavailable"
278
-
279
- return context
280
-
281
- def get_platform_stats(self) -> Dict:
282
- """Get comprehensive platform statistics"""
283
- # This would make actual API calls to Supabase
284
- return {
285
- "song_count": 15420,
286
- "artist_count": 892,
287
- "user_count": 28456,
288
- "playlist_count": 8923,
289
- "active_today": 1245
290
- }
291
 
292
- # Initialize the AI system
293
- ai_system = SaemsTunesAISystem()
294
-
295
- # Gradio Interface
296
- def create_advanced_interface():
297
  with gr.Blocks(
298
  theme=gr.themes.Soft(primary_hue="purple"),
299
- title="Saem's Tunes AI Assistant Pro",
300
  css="""
301
  .gradio-container {
302
  font-family: 'Segoe UI', system-ui;
 
 
303
  }
304
- .performance-stats {
305
- background: #f5f5f5;
306
- padding: 10px;
307
- border-radius: 5px;
308
- margin: 10px 0;
 
309
  }
310
  """
311
  ) as demo:
312
 
313
  gr.Markdown("""
314
- # 🎵 Saem's Tunes AI Assistant Pro
315
- *Powered by Microsoft Phi-3.5-mini-instruct with Advanced Quantization*
 
 
316
  """)
317
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
318
  with gr.Row():
319
- with gr.Column(scale=1):
320
- gr.Markdown("### Configuration")
321
- model_selector = gr.Dropdown(
322
- choices=["fast (Q4_K_M)", "balanced (Q5_K_M)", "quality (Q8_0)"],
323
- value="fast (Q4_K_M)",
324
- label="Model Profile"
325
- )
326
- temperature = gr.Slider(0.1, 1.0, value=0.7, label="Creativity")
327
- max_tokens = gr.Slider(100, 500, value=300, step=50, label="Max Response Length")
328
-
329
- # Performance stats
330
- performance_html = gr.HTML()
331
- update_stats_btn = gr.Button("Refresh Performance Stats")
332
-
333
- with gr.Column(scale=2):
334
- chatbot = gr.Chatbot(
335
- height=500,
336
- placeholder="Ask me anything about Saem's Tunes music platform...",
337
- show_label=False
338
- )
339
-
340
- with gr.Row():
341
- msg = gr.Textbox(
342
- placeholder="Type your question...",
343
- show_label=False,
344
- scale=4
345
- )
346
- send_btn = gr.Button("Send", variant="primary", scale=1)
347
-
348
- with gr.Row():
349
- clear_btn = gr.Button("Clear Chat")
350
- export_btn = gr.Button("Export Conversation")
351
 
352
- # Quick actions
353
- with gr.Accordion("Quick Actions", open=False):
354
- with gr.Row():
355
- gr.Button("Platform Features")
356
- gr.Button("Artist Help")
357
- gr.Button("Technical Support")
358
- gr.Button("Premium Info")
 
 
 
 
 
359
 
360
- # Performance monitoring section
361
- with gr.Accordion("System Performance", open=False):
362
- gr.Markdown("### Real-time Monitoring")
363
- performance_plot = gr.Plot()
364
 
365
  # Event handlers
366
- def update_stats():
367
- stats = ai_system.performance_monitor.get_performance_stats()
368
- html = f"""
369
- <div class="performance-stats">
370
- <strong>System Performance:</strong><br>
371
- Avg Response Time: {stats.get('average_time', 0):.2f}s<br>
372
- Total Queries: {stats.get('total_inferences', 0)}<br>
373
- Memory Usage: {stats.get('current_memory', 0):.1f}%<br>
374
- CPU Usage: {stats.get('current_cpu', 0):.1f}%
375
- </div>
376
- """
377
- return html
 
 
 
 
378
 
379
- def respond(message, chat_history, model_profile, temp, tokens):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
380
  if not message.strip():
381
- return "", chat_history
382
-
383
- # Extract model profile
384
- profile = model_profile.split(" ")[0]
385
 
386
- # Generate response
387
- result = ai_system.generate_response(message, chat_history, profile)
 
 
388
 
389
- # Format response
390
- bot_message = f"{result['response']}\n\n*Generated in {result['processing_time']:.2f}s using {model_profile}*"
 
 
 
 
391
 
392
- chat_history.append((message, bot_message))
393
- return "", chat_history, update_stats()
394
-
395
- # Connect events
396
- msg.submit(respond, [msg, chatbot, model_selector, temperature, max_tokens],
397
- [msg, chatbot, performance_html])
398
- send_btn.click(respond, [msg, chatbot, model_selector, temperature, max_tokens],
399
- [msg, chatbot, performance_html])
400
- update_stats_btn.click(update_stats, outputs=performance_html)
401
- clear_btn.click(lambda: [], outputs=chatbot)
402
-
403
- # Initial stats
404
- demo.load(update_stats, outputs=performance_html)
405
 
406
- return demo
 
 
 
 
 
 
 
 
 
 
 
 
407
 
408
- # Initialize and launch
409
  if __name__ == "__main__":
410
- print("🚀 Initializing Saem's Tunes AI System...")
411
 
412
- # Load models (in a separate thread to avoid blocking)
413
- def load_models_async():
414
- ai_system.load_models()
415
 
416
- loader_thread = threading.Thread(target=load_models_async)
417
- loader_thread.start()
418
 
419
- # Create and launch interface
420
- demo = create_advanced_interface()
421
  demo.launch(
422
  server_name="0.0.0.0",
423
- server_port=7860,
424
  share=True,
425
- show_error=True
 
426
  )
 
1
+
2
  import os
3
  import gradio as gr
4
  import json
5
  import time
6
  import logging
7
  import psutil
 
8
  from datetime import datetime
9
+ from typing import List, Dict, Optional
10
  import requests
11
  import asyncio
12
  import aiohttp
13
+ from src.ai_system import SaemsTunesAISystem
14
+ from src.supabase_integration import SupabaseIntegration
15
+ from src.security_system import SecuritySystem
16
+ from src.monitoring_system import ComprehensiveMonitor
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
 
18
+ # Configuration
19
+ class Config:
20
+ SUPABASE_URL = os.getenv("SUPABASE_URL", "")
21
+ SUPABASE_ANON_KEY = os.getenv("SUPABASE_ANON_KEY", "")
22
+ MODEL_NAME = os.getenv("MODEL_NAME", "microsoft/Phi-3.5-mini-instruct")
23
+ HF_SPACE = os.getenv("HF_SPACE", "saemstunes/STA-AI")
24
+ PORT = int(os.getenv("PORT", 7860))
25
 
26
+ # Initialize systems
27
+ print("🚀 Initializing Saem's Tunes AI System...")
28
 
29
+ # Setup logging
30
+ logging.basicConfig(level=logging.INFO)
31
+ logger = logging.getLogger(__name__)
 
 
32
 
33
+ # Initialize components
34
+ supabase_integration = SupabaseIntegration(Config.SUPABASE_URL, Config.SUPABASE_ANON_KEY)
35
+ security_system = SecuritySystem()
36
+ monitor = ComprehensiveMonitor()
 
 
37
 
38
+ # AI System will be initialized when needed to avoid startup delays
39
+ ai_system = None
 
 
 
40
 
41
+ def initialize_ai_system():
42
+ """Initialize AI system on first use"""
43
+ global ai_system
44
+ if ai_system is None:
45
+ print("🔄 Loading AI system...")
46
+ ai_system = SaemsTunesAISystem(supabase_integration, security_system, monitor)
47
+ print("✅ AI system loaded successfully")
48
+ return ai_system
49
 
50
+ def chat_interface(message: str, history: List[List[str]]) -> str:
51
+ """Main chat interface for Gradio"""
52
+ try:
53
+ # Initialize AI system if needed
54
+ ai_system = initialize_ai_system()
55
 
56
+ if not message.strip():
57
+ return "Please ask me anything about Saem's Tunes!"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
 
59
+ # Process the message
60
+ start_time = time.time()
61
+ response = ai_system.process_query(message, "gradio_user")
62
+ processing_time = time.time() - start_time
 
 
 
 
 
 
63
 
64
+ # Add processing time to response
65
+ formatted_response = f"{response}\n\n_Generated in {processing_time:.1f}s_"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66
 
67
+ return formatted_response
 
 
 
 
 
 
 
 
68
 
69
+ except Exception as e:
70
+ logger.error(f"Chat error: {e}")
71
+ return "I apologize, but I'm experiencing technical difficulties. Please try again later."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
 
73
+ def create_gradio_interface():
74
+ """Create the Gradio interface for Hugging Face Spaces"""
75
+
 
 
76
  with gr.Blocks(
77
  theme=gr.themes.Soft(primary_hue="purple"),
78
+ title="Saem's Tunes AI Assistant",
79
  css="""
80
  .gradio-container {
81
  font-family: 'Segoe UI', system-ui;
82
+ max-width: 800px;
83
+ margin: 0 auto;
84
  }
85
+ .header {
86
+ background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
87
+ color: white;
88
+ padding: 20px;
89
+ border-radius: 10px;
90
+ margin-bottom: 20px;
91
  }
92
  """
93
  ) as demo:
94
 
95
  gr.Markdown("""
96
+ <div class="header">
97
+ <h1>🎵 Saem's Tunes AI Assistant</h1>
98
+ <p>Powered by Microsoft Phi-3.5-mini-instruct • Built for music lovers</p>
99
+ </div>
100
  """)
101
 
102
+ gr.Markdown("""
103
+ **Welcome to Saem's Tunes AI!** I can help you with:
104
+ - Music discovery and recommendations
105
+ - Platform features and how-to guides
106
+ - Artist information and profiles
107
+ - Technical support and FAQs
108
+ - Playlist creation and management
109
+ """)
110
+
111
+ # Chat interface
112
+ chatbot = gr.Chatbot(
113
+ label="Saem's Tunes Chat",
114
+ height=500,
115
+ placeholder="Ask me anything about Saem's Tunes music platform...",
116
+ show_label=False
117
+ )
118
+
119
  with gr.Row():
120
+ msg = gr.Textbox(
121
+ placeholder="Type your question here...",
122
+ show_label=False,
123
+ scale=4,
124
+ container=False
125
+ )
126
+ submit = gr.Button("Send", variant="primary", scale=1)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
127
 
128
+ # Quick examples
129
+ gr.Examples(
130
+ examples=[
131
+ "How do I create a playlist?",
132
+ "What are the premium features?",
133
+ "How do I upload my music?",
134
+ "Who are the trending artists?",
135
+ "How does the recommendation system work?",
136
+ "Can I share playlists with friends?"
137
+ ],
138
+ inputs=msg
139
+ )
140
 
141
+ # Clear button
142
+ clear = gr.Button("Clear Chat")
 
 
143
 
144
  # Event handlers
145
+ def user(user_message, chat_history):
146
+ return "", chat_history + [[user_message, None]]
147
+
148
+ def bot(chat_history):
149
+ user_message = chat_history[-1][0]
150
+ bot_message = chat_interface(user_message, chat_history)
151
+ chat_history[-1][1] = bot_message
152
+ return chat_history
153
+
154
+ msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
155
+ bot, chatbot, chatbot
156
+ )
157
+ submit.click(user, [msg, chatbot], [msg, chatbot], queue=False).then(
158
+ bot, chatbot, chatbot
159
+ )
160
+ clear.click(lambda: None, None, chatbot, queue=False)
161
 
162
+ # Footer
163
+ gr.Markdown("""
164
+ ---
165
+ <div style="text-align: center; color: #666;">
166
+ <p>Powered by <strong>Microsoft Phi-3.5-mini-instruct</strong> •
167
+ Built for <a href="https://www.saemstunes.com" target="_blank">Saem's Tunes</a></p>
168
+ <p>Model: Q4_K_M quantization • Context: 4K tokens • Response time: ~2-5s</p>
169
+ </div>
170
+ """)
171
+
172
+ return demo
173
+
174
+ # Health check endpoint for monitoring
175
+ def health_check():
176
+ """Health check endpoint"""
177
+ health_status = {
178
+ "status": "healthy",
179
+ "timestamp": datetime.now().isoformat(),
180
+ "version": "1.0.0",
181
+ "systems": {
182
+ "supabase": supabase_integration.is_connected(),
183
+ "security": True,
184
+ "monitoring": True,
185
+ "ai_system": ai_system is not None
186
+ },
187
+ "resources": {
188
+ "cpu_percent": psutil.cpu_percent(),
189
+ "memory_percent": psutil.virtual_memory().percent,
190
+ "disk_percent": psutil.disk_usage('/').percent
191
+ }
192
+ }
193
+ return health_status
194
+
195
+ # API endpoints for React frontend
196
+ def setup_api_endpoints(demo):
197
+ """Setup API endpoints for React frontend"""
198
+
199
+ @demo.app.post("/api/chat")
200
+ async def api_chat(request: dict):
201
+ """API endpoint for chat from React frontend"""
202
+ try:
203
+ message = request.get("message", "")
204
+ user_id = request.get("user_id", "anonymous")
205
+ conversation_id = request.get("conversation_id")
206
+
207
  if not message.strip():
208
+ return {"error": "Message cannot be empty"}
209
+
210
+ # Initialize AI system if needed
211
+ ai_system = initialize_ai_system()
212
 
213
+ # Process query
214
+ start_time = time.time()
215
+ response = ai_system.process_query(message, user_id, conversation_id)
216
+ processing_time = time.time() - start_time
217
 
218
+ return {
219
+ "response": response,
220
+ "processing_time": processing_time,
221
+ "conversation_id": conversation_id or f"conv_{int(time.time())}",
222
+ "timestamp": datetime.now().isoformat()
223
+ }
224
 
225
+ except Exception as e:
226
+ logger.error(f"API chat error: {e}")
227
+ return {"error": "Internal server error", "details": str(e)}
 
 
 
 
 
 
 
 
 
 
228
 
229
+ @demo.app.get("/api/health")
230
+ async def api_health():
231
+ """Health check API endpoint"""
232
+ return health_check()
233
+
234
+ @demo.app.get("/api/models")
235
+ async def api_models():
236
+ """Get available models"""
237
+ return {
238
+ "available_models": ["microsoft/Phi-3.5-mini-instruct"],
239
+ "current_model": Config.MODEL_NAME,
240
+ "quantization": "Q4_K_M"
241
+ }
242
 
243
+ # Main application
244
  if __name__ == "__main__":
245
+ print("🎵 Starting Saem's Tunes AI on Hugging Face Spaces...")
246
 
247
+ # Create Gradio interface
248
+ demo = create_gradio_interface()
 
249
 
250
+ # Setup API endpoints
251
+ setup_api_endpoints(demo)
252
 
253
+ # Launch with Hugging Face Spaces configuration
 
254
  demo.launch(
255
  server_name="0.0.0.0",
256
+ server_port=Config.PORT,
257
  share=True,
258
+ show_error=True,
259
+ debug=False
260
  )