MGZON commited on
Commit
75fed13
·
verified ·
1 Parent(s): 26f5dc6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -5
app.py CHANGED
@@ -2,10 +2,12 @@ import os
2
  import logging
3
  import time
4
  from fastapi import FastAPI, HTTPException
 
5
  from pydantic import BaseModel
6
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
7
  from llama_cpp import Llama
8
  import asyncio
 
9
 
10
  # Set up logging
11
  logging.basicConfig(level=logging.INFO)
@@ -31,9 +33,18 @@ models_loaded = False
31
  # Health check endpoint
32
  @app.get("/health")
33
  async def health_check():
 
34
  if not models_loaded:
35
- return {"status": "loading", "message": "Models are still loading"}
36
- return {"status": "healthy"}
 
 
 
 
 
 
 
 
37
 
38
  # Async function to load models
39
  async def load_models():
@@ -69,7 +80,7 @@ async def load_models():
69
  mistral = Llama(
70
  model_path=gguf_path,
71
  n_ctx=2048,
72
- n_threads=4, # قللنا عدد الـ threads عشان نقلل الحمل
73
  n_batch=512,
74
  verbose=True
75
  )
@@ -85,6 +96,7 @@ async def load_models():
85
  # Run model loading in the background
86
  @app.on_event("startup")
87
  async def startup_event():
 
88
  asyncio.create_task(load_models())
89
 
90
  # Define request schema
@@ -95,10 +107,11 @@ class AskRequest(BaseModel):
95
  # Endpoint: /ask
96
  @app.post("/ask")
97
  async def ask(req: AskRequest):
 
98
  if not models_loaded:
 
99
  raise HTTPException(status_code=503, detail="Models are still loading, please try again later")
100
 
101
- logger.info(f"Received question: {req.question}")
102
  q = req.question.strip()
103
  if not q:
104
  logger.error("Empty question received")
@@ -122,4 +135,8 @@ async def ask(req: AskRequest):
122
  return {"model": model_name, "response": answer}
123
  except Exception as e:
124
  logger.error(f"Error processing request: {str(e)}")
125
- raise HTTPException(status_code=500, detail=f"خطأ أثناء معالجة الطلب: {str(e)}")
 
 
 
 
 
2
  import logging
3
  import time
4
  from fastapi import FastAPI, HTTPException
5
+ from fastapi.responses import JSONResponse
6
  from pydantic import BaseModel
7
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
8
  from llama_cpp import Llama
9
  import asyncio
10
+ import uvicorn
11
 
12
  # Set up logging
13
  logging.basicConfig(level=logging.INFO)
 
33
  # Health check endpoint
34
  @app.get("/health")
35
  async def health_check():
36
+ logger.info("Health check endpoint called at %s", time.time())
37
  if not models_loaded:
38
+ logger.info("Returning 'loading' status")
39
+ return JSONResponse(
40
+ content={"status": "loading", "message": "Models are still loading"},
41
+ headers={"Cache-Control": "no-cache", "Connection": "keep-alive"}
42
+ )
43
+ logger.info("Returning 'healthy' status")
44
+ return JSONResponse(
45
+ content={"status": "healthy"},
46
+ headers={"Cache-Control": "no-cache", "Connection": "keep-alive"}
47
+ )
48
 
49
  # Async function to load models
50
  async def load_models():
 
80
  mistral = Llama(
81
  model_path=gguf_path,
82
  n_ctx=2048,
83
+ n_threads=4,
84
  n_batch=512,
85
  verbose=True
86
  )
 
96
  # Run model loading in the background
97
  @app.on_event("startup")
98
  async def startup_event():
99
+ logger.info("Startup event triggered")
100
  asyncio.create_task(load_models())
101
 
102
  # Define request schema
 
107
  # Endpoint: /ask
108
  @app.post("/ask")
109
  async def ask(req: AskRequest):
110
+ logger.info(f"Received ask request: {req.question}")
111
  if not models_loaded:
112
+ logger.error("Models not loaded yet")
113
  raise HTTPException(status_code=503, detail="Models are still loading, please try again later")
114
 
 
115
  q = req.question.strip()
116
  if not q:
117
  logger.error("Empty question received")
 
135
  return {"model": model_name, "response": answer}
136
  except Exception as e:
137
  logger.error(f"Error processing request: {str(e)}")
138
+ raise HTTPException(status_code=500, detail=f"خطأ أثناء معالجة الطلب: {str(e)}")
139
+
140
+ # Run the app
141
+ if __name__ == "__main__":
142
+ uvicorn.run(app, host="0.0.0.0", port=8080, log_level="info")