Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -24,36 +24,38 @@ from src.monitoring_system import ComprehensiveMonitor
|
|
| 24 |
class Config:
|
| 25 |
SUPABASE_URL = os.getenv("SUPABASE_URL", "")
|
| 26 |
SUPABASE_ANON_KEY = os.getenv("SUPABASE_ANON_KEY", "")
|
| 27 |
-
MODEL_NAME = os.getenv("MODEL_NAME", "
|
|
|
|
|
|
|
| 28 |
HF_SPACE = os.getenv("HF_SPACE", "saemstunes/STA-AI")
|
| 29 |
PORT = int(os.getenv("PORT", 8000))
|
| 30 |
LOG_LEVEL = os.getenv("LOG_LEVEL", "INFO")
|
| 31 |
MAX_RESPONSE_LENGTH = int(os.getenv("MAX_RESPONSE_LENGTH", "500"))
|
|
|
|
|
|
|
|
|
|
| 32 |
ENABLE_MONITORING = os.getenv("ENABLE_MONITORING", "true").lower() == "true"
|
| 33 |
|
| 34 |
logging.basicConfig(
|
| 35 |
level=getattr(logging, Config.LOG_LEVEL),
|
| 36 |
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
| 37 |
-
handlers=[
|
| 38 |
-
logging.StreamHandler(),
|
| 39 |
-
logging.FileHandler('saems_ai.log')
|
| 40 |
-
]
|
| 41 |
)
|
| 42 |
logger = logging.getLogger(__name__)
|
| 43 |
|
| 44 |
-
# Global systems and initialization state
|
| 45 |
supabase_integration = None
|
| 46 |
security_system = None
|
| 47 |
monitor = None
|
| 48 |
ai_system = None
|
| 49 |
systems_ready = False
|
| 50 |
initialization_complete = False
|
|
|
|
|
|
|
| 51 |
|
| 52 |
def initialize_systems():
|
| 53 |
-
|
| 54 |
-
global supabase_integration, security_system, monitor, ai_system, systems_ready, initialization_complete
|
| 55 |
|
| 56 |
-
logger.info("🚀
|
| 57 |
|
| 58 |
try:
|
| 59 |
supabase_integration = AdvancedSupabaseIntegration(
|
|
@@ -65,41 +67,146 @@ def initialize_systems():
|
|
| 65 |
security_system = AdvancedSecuritySystem()
|
| 66 |
logger.info("✅ Security system initialized")
|
| 67 |
|
| 68 |
-
monitor = ComprehensiveMonitor()
|
| 69 |
logger.info("✅ Monitoring system initialized")
|
| 70 |
|
| 71 |
ai_system = SaemsTunesAISystem(
|
| 72 |
-
supabase_integration,
|
| 73 |
-
security_system,
|
| 74 |
-
monitor,
|
| 75 |
model_name=Config.MODEL_NAME,
|
| 76 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 77 |
)
|
| 78 |
logger.info("✅ AI system initialized")
|
| 79 |
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 83 |
|
| 84 |
return True
|
| 85 |
|
| 86 |
except Exception as e:
|
| 87 |
-
|
|
|
|
|
|
|
| 88 |
initialization_complete = True
|
| 89 |
return False
|
| 90 |
|
| 91 |
-
def
|
| 92 |
-
"""
|
| 93 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 94 |
thread.start()
|
| 95 |
|
| 96 |
-
|
| 97 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 98 |
|
| 99 |
-
|
| 100 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 101 |
|
| 102 |
-
#
|
| 103 |
@fastapi_app.get("/")
|
| 104 |
def root():
|
| 105 |
"""Root endpoint for Hugging Face health checks"""
|
|
@@ -107,14 +214,12 @@ def root():
|
|
| 107 |
"status": "healthy" if systems_ready else "initializing",
|
| 108 |
"message": "Saem's Tunes AI API is running",
|
| 109 |
"timestamp": datetime.now().isoformat(),
|
| 110 |
-
"version": "
|
| 111 |
-
"
|
| 112 |
-
"initialization_complete": initialization_complete
|
| 113 |
}
|
| 114 |
|
| 115 |
@fastapi_app.get("/api/health")
|
| 116 |
def api_health():
|
| 117 |
-
"""Health check endpoint"""
|
| 118 |
try:
|
| 119 |
status_data = get_system_status()
|
| 120 |
return status_data
|
|
@@ -128,28 +233,47 @@ def api_health():
|
|
| 128 |
@fastapi_app.get("/api/models")
|
| 129 |
def api_models():
|
| 130 |
models_info = {
|
| 131 |
-
"available_models": ["
|
| 132 |
"current_model": Config.MODEL_NAME,
|
|
|
|
|
|
|
| 133 |
"quantization": "Q4_K_M",
|
| 134 |
-
"context_length":
|
| 135 |
-
"parameters": "
|
|
|
|
|
|
|
|
|
|
| 136 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 137 |
return models_info
|
| 138 |
|
| 139 |
@fastapi_app.get("/api/stats")
|
| 140 |
def api_stats():
|
| 141 |
-
if not monitor:
|
| 142 |
return JSONResponse(
|
| 143 |
-
content={
|
| 144 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 145 |
)
|
| 146 |
|
| 147 |
stats_data = {
|
|
|
|
| 148 |
"total_requests": len(monitor.inference_metrics),
|
| 149 |
"average_response_time": monitor.get_average_response_time(),
|
| 150 |
"error_rate": monitor.get_error_rate(),
|
| 151 |
"uptime": monitor.get_uptime(),
|
| 152 |
-
"system_health": get_system_status()
|
|
|
|
| 153 |
}
|
| 154 |
return stats_data
|
| 155 |
|
|
@@ -160,7 +284,10 @@ def api_chat(request: ChatRequest):
|
|
| 160 |
raise HTTPException(status_code=400, detail="Message cannot be empty")
|
| 161 |
|
| 162 |
if not systems_ready:
|
| 163 |
-
raise HTTPException(
|
|
|
|
|
|
|
|
|
|
| 164 |
|
| 165 |
security_result = security_system.check_request(request.message, request.user_id)
|
| 166 |
if security_result["is_suspicious"]:
|
|
@@ -184,94 +311,6 @@ def api_chat(request: ChatRequest):
|
|
| 184 |
logger.error(f"API chat error: {e}")
|
| 185 |
raise HTTPException(status_code=500, detail="Internal server error")
|
| 186 |
|
| 187 |
-
class ChatRequest(BaseModel):
|
| 188 |
-
message: str
|
| 189 |
-
user_id: Optional[str] = "anonymous"
|
| 190 |
-
conversation_id: Optional[str] = None
|
| 191 |
-
|
| 192 |
-
class ChatResponse(BaseModel):
|
| 193 |
-
response: str
|
| 194 |
-
processing_time: float
|
| 195 |
-
conversation_id: str
|
| 196 |
-
timestamp: str
|
| 197 |
-
model_used: str
|
| 198 |
-
|
| 199 |
-
def chat_interface(message: str, history: List[List[str]], request: gr.Request) -> str:
|
| 200 |
-
try:
|
| 201 |
-
if not message.strip():
|
| 202 |
-
return "Please ask me anything about Saem's Tunes!"
|
| 203 |
-
|
| 204 |
-
if not systems_ready:
|
| 205 |
-
return "🔄 System is still initializing. Please wait a moment and try again..."
|
| 206 |
-
|
| 207 |
-
client_host = getattr(request, "client", None)
|
| 208 |
-
if client_host:
|
| 209 |
-
user_ip = client_host.host
|
| 210 |
-
else:
|
| 211 |
-
user_ip = "unknown"
|
| 212 |
-
user_id = f"gradio_user_{user_ip}"
|
| 213 |
-
|
| 214 |
-
security_result = security_system.check_request(message, user_id)
|
| 215 |
-
if security_result["is_suspicious"]:
|
| 216 |
-
logger.warning(f"Suspicious request blocked from {user_ip}: {message}")
|
| 217 |
-
return "Your request has been blocked for security reasons. Please try a different question."
|
| 218 |
-
|
| 219 |
-
start_time = time.time()
|
| 220 |
-
response = ai_system.process_query(message, user_id)
|
| 221 |
-
processing_time = time.time() - start_time
|
| 222 |
-
|
| 223 |
-
formatted_response = f"{response}\n\n_Generated in {processing_time:.1f}s_"
|
| 224 |
-
|
| 225 |
-
logger.info(f"Chat processed: {message[:50]}... -> {processing_time:.2f}s")
|
| 226 |
-
|
| 227 |
-
return formatted_response
|
| 228 |
-
|
| 229 |
-
except Exception as e:
|
| 230 |
-
logger.error(f"Chat error: {e}")
|
| 231 |
-
return "I apologize, but I'm experiencing technical difficulties. Please try again later."
|
| 232 |
-
|
| 233 |
-
def get_system_status() -> Dict[str, Any]:
|
| 234 |
-
if not initialization_complete:
|
| 235 |
-
return {
|
| 236 |
-
"status": "initializing",
|
| 237 |
-
"details": "Systems are starting up...",
|
| 238 |
-
"systems_ready": systems_ready,
|
| 239 |
-
"timestamp": datetime.now().isoformat()
|
| 240 |
-
}
|
| 241 |
-
|
| 242 |
-
if not systems_ready:
|
| 243 |
-
return {
|
| 244 |
-
"status": "degraded",
|
| 245 |
-
"details": "Systems initialized but not ready",
|
| 246 |
-
"systems_ready": systems_ready,
|
| 247 |
-
"timestamp": datetime.now().isoformat()
|
| 248 |
-
}
|
| 249 |
-
|
| 250 |
-
try:
|
| 251 |
-
return {
|
| 252 |
-
"status": "healthy",
|
| 253 |
-
"timestamp": datetime.now().isoformat(),
|
| 254 |
-
"systems": {
|
| 255 |
-
"supabase": supabase_integration.is_connected() if supabase_integration else False,
|
| 256 |
-
"security": True,
|
| 257 |
-
"monitoring": True,
|
| 258 |
-
"ai_system": ai_system.is_healthy() if ai_system else False
|
| 259 |
-
},
|
| 260 |
-
"resources": {
|
| 261 |
-
"cpu_percent": psutil.cpu_percent(),
|
| 262 |
-
"memory_percent": psutil.virtual_memory().percent,
|
| 263 |
-
"disk_percent": psutil.disk_usage('/').percent
|
| 264 |
-
},
|
| 265 |
-
"performance": {
|
| 266 |
-
"total_requests": len(monitor.inference_metrics),
|
| 267 |
-
"avg_response_time": monitor.get_average_response_time(),
|
| 268 |
-
"error_rate": monitor.get_error_rate()
|
| 269 |
-
},
|
| 270 |
-
"systems_ready": systems_ready
|
| 271 |
-
}
|
| 272 |
-
except Exception as e:
|
| 273 |
-
return {"status": "error", "error": str(e)}
|
| 274 |
-
|
| 275 |
def create_gradio_interface():
|
| 276 |
custom_css = """
|
| 277 |
.gradio-container {
|
|
@@ -334,7 +373,7 @@ def create_gradio_interface():
|
|
| 334 |
<div class="header">
|
| 335 |
<h1 style="margin: 0; font-size: 2.2em;">🎵 Saem's Tunes AI Assistant</h1>
|
| 336 |
<p style="margin: 10px 0 0 0; font-size: 1.1em; opacity: 0.9;">
|
| 337 |
-
Powered by
|
| 338 |
</p>
|
| 339 |
</div>
|
| 340 |
""")
|
|
@@ -408,11 +447,11 @@ def create_gradio_interface():
|
|
| 408 |
gr.Markdown("""
|
| 409 |
<div class="footer">
|
| 410 |
<p>
|
| 411 |
-
<strong>Powered by
|
| 412 |
<a href="https://www.saemstunes.com" target="_blank">Saem's Tunes Music Platform</a>
|
| 413 |
</p>
|
| 414 |
<p style="font-size: 0.9em; opacity: 0.7;">
|
| 415 |
-
Model: Q4_K_M quantization • Context:
|
| 416 |
</p>
|
| 417 |
</div>
|
| 418 |
""")
|
|
@@ -431,12 +470,18 @@ def create_gradio_interface():
|
|
| 431 |
<small>
|
| 432 |
Supabase: {'✅' if systems.get('supabase') else '❌'} |
|
| 433 |
AI System: {'✅' if systems.get('ai_system') else '❌'} |
|
|
|
|
| 434 |
CPU: {resources.get('cpu_percent', 0):.1f}% |
|
| 435 |
Memory: {resources.get('memory_percent', 0):.1f}%
|
| 436 |
</small>
|
| 437 |
"""
|
| 438 |
elif status_text == "initializing":
|
| 439 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 440 |
else:
|
| 441 |
html = f"<div class='status-indicator {status_class}'></div>{status.get('details', 'Unknown status')}"
|
| 442 |
|
|
@@ -495,20 +540,15 @@ def create_gradio_interface():
|
|
| 495 |
|
| 496 |
return demo
|
| 497 |
|
| 498 |
-
# Create Gradio interface and mount to FastAPI
|
| 499 |
demo = create_gradio_interface()
|
| 500 |
app = gr.mount_gradio_app(fastapi_app, demo, path="/")
|
| 501 |
|
|
|
|
|
|
|
|
|
|
| 502 |
if __name__ == "__main__":
|
| 503 |
-
logger.info("🎵 Starting Saem's Tunes AI
|
| 504 |
-
|
| 505 |
-
# For local development, wait for initialization
|
| 506 |
-
if not initialization_complete:
|
| 507 |
-
logger.info("⏳ Waiting for system initialization...")
|
| 508 |
-
for i in range(30): # Wait up to 30 seconds
|
| 509 |
-
if initialization_complete:
|
| 510 |
-
break
|
| 511 |
-
time.sleep(1)
|
| 512 |
|
| 513 |
import uvicorn
|
| 514 |
uvicorn.run(
|
|
|
|
| 24 |
class Config:
|
| 25 |
SUPABASE_URL = os.getenv("SUPABASE_URL", "")
|
| 26 |
SUPABASE_ANON_KEY = os.getenv("SUPABASE_ANON_KEY", "")
|
| 27 |
+
MODEL_NAME = os.getenv("MODEL_NAME", "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF")
|
| 28 |
+
MODEL_REPO = os.getenv("MODEL_REPO", "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF")
|
| 29 |
+
MODEL_FILE = os.getenv("MODEL_FILE", "tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf")
|
| 30 |
HF_SPACE = os.getenv("HF_SPACE", "saemstunes/STA-AI")
|
| 31 |
PORT = int(os.getenv("PORT", 8000))
|
| 32 |
LOG_LEVEL = os.getenv("LOG_LEVEL", "INFO")
|
| 33 |
MAX_RESPONSE_LENGTH = int(os.getenv("MAX_RESPONSE_LENGTH", "500"))
|
| 34 |
+
TEMPERATURE = float(os.getenv("TEMPERATURE", "0.7"))
|
| 35 |
+
TOP_P = float(os.getenv("TOP_P", "0.9"))
|
| 36 |
+
CONTEXT_WINDOW = int(os.getenv("CONTEXT_WINDOW", "2048"))
|
| 37 |
ENABLE_MONITORING = os.getenv("ENABLE_MONITORING", "true").lower() == "true"
|
| 38 |
|
| 39 |
logging.basicConfig(
|
| 40 |
level=getattr(logging, Config.LOG_LEVEL),
|
| 41 |
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
| 42 |
+
handlers=[logging.StreamHandler()] # Only StreamHandler for Hugging Face Spaces
|
|
|
|
|
|
|
|
|
|
| 43 |
)
|
| 44 |
logger = logging.getLogger(__name__)
|
| 45 |
|
|
|
|
| 46 |
supabase_integration = None
|
| 47 |
security_system = None
|
| 48 |
monitor = None
|
| 49 |
ai_system = None
|
| 50 |
systems_ready = False
|
| 51 |
initialization_complete = False
|
| 52 |
+
initialization_errors = []
|
| 53 |
+
initialization_start_time = None
|
| 54 |
|
| 55 |
def initialize_systems():
|
| 56 |
+
global supabase_integration, security_system, monitor, ai_system, systems_ready, initialization_complete, initialization_errors
|
|
|
|
| 57 |
|
| 58 |
+
logger.info("🚀 Initializing Saem's Tunes AI System...")
|
| 59 |
|
| 60 |
try:
|
| 61 |
supabase_integration = AdvancedSupabaseIntegration(
|
|
|
|
| 67 |
security_system = AdvancedSecuritySystem()
|
| 68 |
logger.info("✅ Security system initialized")
|
| 69 |
|
| 70 |
+
monitor = ComprehensiveMonitor(prometheus_port=8001)
|
| 71 |
logger.info("✅ Monitoring system initialized")
|
| 72 |
|
| 73 |
ai_system = SaemsTunesAISystem(
|
| 74 |
+
supabase_integration=supabase_integration,
|
| 75 |
+
security_system=security_system,
|
| 76 |
+
monitor=monitor,
|
| 77 |
model_name=Config.MODEL_NAME,
|
| 78 |
+
model_repo=Config.MODEL_REPO,
|
| 79 |
+
model_file=Config.MODEL_FILE,
|
| 80 |
+
max_response_length=Config.MAX_RESPONSE_LENGTH,
|
| 81 |
+
temperature=Config.TEMPERATURE,
|
| 82 |
+
top_p=Config.TOP_P,
|
| 83 |
+
context_window=Config.CONTEXT_WINDOW
|
| 84 |
)
|
| 85 |
logger.info("✅ AI system initialized")
|
| 86 |
|
| 87 |
+
if ai_system.is_healthy():
|
| 88 |
+
systems_ready = True
|
| 89 |
+
initialization_complete = True
|
| 90 |
+
logger.info("🎉 All systems initialized successfully!")
|
| 91 |
+
else:
|
| 92 |
+
initialization_errors.append("AI system health check failed")
|
| 93 |
+
initialization_complete = True
|
| 94 |
|
| 95 |
return True
|
| 96 |
|
| 97 |
except Exception as e:
|
| 98 |
+
error_msg = f"System initialization failed: {str(e)}"
|
| 99 |
+
logger.error(error_msg)
|
| 100 |
+
initialization_errors.append(error_msg)
|
| 101 |
initialization_complete = True
|
| 102 |
return False
|
| 103 |
|
| 104 |
+
def initialize_systems_background():
|
| 105 |
+
"""Run system initialization in background thread"""
|
| 106 |
+
global initialization_start_time
|
| 107 |
+
initialization_start_time = time.time()
|
| 108 |
+
|
| 109 |
+
thread = threading.Thread(target=initialize_systems)
|
| 110 |
+
thread.daemon = True
|
| 111 |
thread.start()
|
| 112 |
|
| 113 |
+
def chat_interface(message: str, history: List[List[str]], request: gr.Request) -> str:
|
| 114 |
+
try:
|
| 115 |
+
if not message.strip():
|
| 116 |
+
return "Please ask me anything about Saem's Tunes!"
|
| 117 |
+
|
| 118 |
+
if not systems_ready:
|
| 119 |
+
return "🔄 Systems are still initializing. Please wait a moment and try again..."
|
| 120 |
+
|
| 121 |
+
client_host = getattr(request, "client", None)
|
| 122 |
+
if client_host:
|
| 123 |
+
user_ip = client_host.host
|
| 124 |
+
else:
|
| 125 |
+
user_ip = "unknown"
|
| 126 |
+
user_id = f"gradio_user_{user_ip}"
|
| 127 |
+
|
| 128 |
+
security_result = security_system.check_request(message, user_id)
|
| 129 |
+
if security_result["is_suspicious"]:
|
| 130 |
+
logger.warning(f"Suspicious request blocked from {user_ip}: {message}")
|
| 131 |
+
return "Your request has been blocked for security reasons. Please try a different question."
|
| 132 |
+
|
| 133 |
+
start_time = time.time()
|
| 134 |
+
response = ai_system.process_query(message, user_id)
|
| 135 |
+
processing_time = time.time() - start_time
|
| 136 |
+
|
| 137 |
+
formatted_response = f"{response}\n\n_Generated in {processing_time:.1f}s_"
|
| 138 |
+
|
| 139 |
+
logger.info(f"Chat processed: {message[:50]}... -> {processing_time:.2f}s")
|
| 140 |
+
|
| 141 |
+
return formatted_response
|
| 142 |
+
|
| 143 |
+
except Exception as e:
|
| 144 |
+
logger.error(f"Chat error: {e}")
|
| 145 |
+
return "I apologize, but I'm experiencing technical difficulties. Please try again later."
|
| 146 |
|
| 147 |
+
def get_system_status() -> Dict[str, Any]:
|
| 148 |
+
if not initialization_complete:
|
| 149 |
+
return {
|
| 150 |
+
"status": "initializing",
|
| 151 |
+
"details": "Systems are starting up...",
|
| 152 |
+
"timestamp": datetime.now().isoformat(),
|
| 153 |
+
"initialization_started": initialization_start_time is not None,
|
| 154 |
+
"duration_seconds": time.time() - initialization_start_time if initialization_start_time else 0
|
| 155 |
+
}
|
| 156 |
+
|
| 157 |
+
if not systems_ready:
|
| 158 |
+
return {
|
| 159 |
+
"status": "degraded",
|
| 160 |
+
"details": "Systems initialized but not fully ready",
|
| 161 |
+
"errors": initialization_errors,
|
| 162 |
+
"timestamp": datetime.now().isoformat()
|
| 163 |
+
}
|
| 164 |
+
|
| 165 |
+
try:
|
| 166 |
+
return {
|
| 167 |
+
"status": "healthy",
|
| 168 |
+
"timestamp": datetime.now().isoformat(),
|
| 169 |
+
"systems": {
|
| 170 |
+
"supabase": supabase_integration.is_connected() if supabase_integration else False,
|
| 171 |
+
"security": bool(security_system),
|
| 172 |
+
"monitoring": bool(monitor),
|
| 173 |
+
"ai_system": ai_system.is_healthy() if ai_system else False,
|
| 174 |
+
"model_loaded": ai_system.model_loaded if ai_system else False
|
| 175 |
+
},
|
| 176 |
+
"resources": {
|
| 177 |
+
"cpu_percent": psutil.cpu_percent(),
|
| 178 |
+
"memory_percent": psutil.virtual_memory().percent,
|
| 179 |
+
"disk_percent": psutil.disk_usage('/').percent
|
| 180 |
+
},
|
| 181 |
+
"performance": {
|
| 182 |
+
"total_requests": len(monitor.inference_metrics) if monitor else 0,
|
| 183 |
+
"avg_response_time": monitor.get_average_response_time() if monitor else 0,
|
| 184 |
+
"error_rate": monitor.get_error_rate() if monitor else 0
|
| 185 |
+
}
|
| 186 |
+
}
|
| 187 |
+
except Exception as e:
|
| 188 |
+
return {
|
| 189 |
+
"status": "error",
|
| 190 |
+
"error": str(e),
|
| 191 |
+
"timestamp": datetime.now().isoformat()
|
| 192 |
+
}
|
| 193 |
+
|
| 194 |
+
class ChatRequest(BaseModel):
|
| 195 |
+
message: str
|
| 196 |
+
user_id: Optional[str] = "anonymous"
|
| 197 |
+
conversation_id: Optional[str] = None
|
| 198 |
+
|
| 199 |
+
class ChatResponse(BaseModel):
|
| 200 |
+
response: str
|
| 201 |
+
processing_time: float
|
| 202 |
+
conversation_id: str
|
| 203 |
+
timestamp: str
|
| 204 |
+
model_used: str
|
| 205 |
+
|
| 206 |
+
# Create FastAPI app at module level - REQUIRED FOR HUGGING FACE
|
| 207 |
+
fastapi_app = FastAPI(title="Saem's Tunes AI API", version="2.0.0")
|
| 208 |
|
| 209 |
+
# Add root route - REQUIRED FOR HUGGING FACE HEALTH CHECKS
|
| 210 |
@fastapi_app.get("/")
|
| 211 |
def root():
|
| 212 |
"""Root endpoint for Hugging Face health checks"""
|
|
|
|
| 214 |
"status": "healthy" if systems_ready else "initializing",
|
| 215 |
"message": "Saem's Tunes AI API is running",
|
| 216 |
"timestamp": datetime.now().isoformat(),
|
| 217 |
+
"version": "2.0.0",
|
| 218 |
+
"environment": "huggingface-spaces"
|
|
|
|
| 219 |
}
|
| 220 |
|
| 221 |
@fastapi_app.get("/api/health")
|
| 222 |
def api_health():
|
|
|
|
| 223 |
try:
|
| 224 |
status_data = get_system_status()
|
| 225 |
return status_data
|
|
|
|
| 233 |
@fastapi_app.get("/api/models")
|
| 234 |
def api_models():
|
| 235 |
models_info = {
|
| 236 |
+
"available_models": ["TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF"],
|
| 237 |
"current_model": Config.MODEL_NAME,
|
| 238 |
+
"model_repo": Config.MODEL_REPO,
|
| 239 |
+
"model_file": Config.MODEL_FILE,
|
| 240 |
"quantization": "Q4_K_M",
|
| 241 |
+
"context_length": Config.CONTEXT_WINDOW,
|
| 242 |
+
"parameters": "1.1B",
|
| 243 |
+
"max_response_length": Config.MAX_RESPONSE_LENGTH,
|
| 244 |
+
"temperature": Config.TEMPERATURE,
|
| 245 |
+
"top_p": Config.TOP_P
|
| 246 |
}
|
| 247 |
+
|
| 248 |
+
if ai_system and systems_ready:
|
| 249 |
+
try:
|
| 250 |
+
model_stats = ai_system.get_model_stats()
|
| 251 |
+
models_info.update(model_stats)
|
| 252 |
+
except Exception as e:
|
| 253 |
+
logger.warning(f"Could not get model stats: {e}")
|
| 254 |
+
|
| 255 |
return models_info
|
| 256 |
|
| 257 |
@fastapi_app.get("/api/stats")
|
| 258 |
def api_stats():
|
| 259 |
+
if not monitor or not systems_ready:
|
| 260 |
return JSONResponse(
|
| 261 |
+
content={
|
| 262 |
+
"status": "initializing" if not systems_ready else "degraded",
|
| 263 |
+
"systems_ready": systems_ready,
|
| 264 |
+
"timestamp": datetime.now().isoformat()
|
| 265 |
+
},
|
| 266 |
+
status_code=200 # Always return 200 for Hugging Face
|
| 267 |
)
|
| 268 |
|
| 269 |
stats_data = {
|
| 270 |
+
"status": "healthy",
|
| 271 |
"total_requests": len(monitor.inference_metrics),
|
| 272 |
"average_response_time": monitor.get_average_response_time(),
|
| 273 |
"error_rate": monitor.get_error_rate(),
|
| 274 |
"uptime": monitor.get_uptime(),
|
| 275 |
+
"system_health": get_system_status(),
|
| 276 |
+
"timestamp": datetime.now().isoformat()
|
| 277 |
}
|
| 278 |
return stats_data
|
| 279 |
|
|
|
|
| 284 |
raise HTTPException(status_code=400, detail="Message cannot be empty")
|
| 285 |
|
| 286 |
if not systems_ready:
|
| 287 |
+
raise HTTPException(
|
| 288 |
+
status_code=503,
|
| 289 |
+
detail="Systems are still initializing. Please try again in a moment."
|
| 290 |
+
)
|
| 291 |
|
| 292 |
security_result = security_system.check_request(request.message, request.user_id)
|
| 293 |
if security_result["is_suspicious"]:
|
|
|
|
| 311 |
logger.error(f"API chat error: {e}")
|
| 312 |
raise HTTPException(status_code=500, detail="Internal server error")
|
| 313 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 314 |
def create_gradio_interface():
|
| 315 |
custom_css = """
|
| 316 |
.gradio-container {
|
|
|
|
| 373 |
<div class="header">
|
| 374 |
<h1 style="margin: 0; font-size: 2.2em;">🎵 Saem's Tunes AI Assistant</h1>
|
| 375 |
<p style="margin: 10px 0 0 0; font-size: 1.1em; opacity: 0.9;">
|
| 376 |
+
Powered by TinyLlama 1.1B • Built for music education and streaming
|
| 377 |
</p>
|
| 378 |
</div>
|
| 379 |
""")
|
|
|
|
| 447 |
gr.Markdown("""
|
| 448 |
<div class="footer">
|
| 449 |
<p>
|
| 450 |
+
<strong>Powered by TinyLlama 1.1B Chat</strong> •
|
| 451 |
<a href="https://www.saemstunes.com" target="_blank">Saem's Tunes Music Platform</a>
|
| 452 |
</p>
|
| 453 |
<p style="font-size: 0.9em; opacity: 0.7;">
|
| 454 |
+
Model: Q4_K_M quantization • Context: 2K tokens • Response time: ~2-5s
|
| 455 |
</p>
|
| 456 |
</div>
|
| 457 |
""")
|
|
|
|
| 470 |
<small>
|
| 471 |
Supabase: {'✅' if systems.get('supabase') else '❌'} |
|
| 472 |
AI System: {'✅' if systems.get('ai_system') else '❌'} |
|
| 473 |
+
Model: {'✅' if systems.get('model_loaded') else '❌'} |
|
| 474 |
CPU: {resources.get('cpu_percent', 0):.1f}% |
|
| 475 |
Memory: {resources.get('memory_percent', 0):.1f}%
|
| 476 |
</small>
|
| 477 |
"""
|
| 478 |
elif status_text == "initializing":
|
| 479 |
+
duration = status.get('duration_seconds', 0)
|
| 480 |
+
html = f"""
|
| 481 |
+
<div class='status-indicator {status_class}'></div>
|
| 482 |
+
<strong>System Status: Initializing</strong><br>
|
| 483 |
+
<small>Started {duration:.0f}s ago • Downloading AI model...</small>
|
| 484 |
+
"""
|
| 485 |
else:
|
| 486 |
html = f"<div class='status-indicator {status_class}'></div>{status.get('details', 'Unknown status')}"
|
| 487 |
|
|
|
|
| 540 |
|
| 541 |
return demo
|
| 542 |
|
| 543 |
+
# Create Gradio interface and mount to FastAPI - AT MODULE LEVEL FOR HUGGING FACE
|
| 544 |
demo = create_gradio_interface()
|
| 545 |
app = gr.mount_gradio_app(fastapi_app, demo, path="/")
|
| 546 |
|
| 547 |
+
# Start background initialization
|
| 548 |
+
initialize_systems_background()
|
| 549 |
+
|
| 550 |
if __name__ == "__main__":
|
| 551 |
+
logger.info("🎵 Starting Saem's Tunes AI on Hugging Face Spaces...")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 552 |
|
| 553 |
import uvicorn
|
| 554 |
uvicorn.run(
|