fugthchat commited on
Commit
11528d2
·
verified ·
1 Parent(s): 646045d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -12
app.py CHANGED
@@ -12,8 +12,7 @@ import threading
12
  logging.basicConfig(level=logging.INFO)
13
 
14
  # --- MODEL MAP ---
15
- # This maps the "light", "medium", "heavy" keys from your frontend
16
- # to the actual model files on Hugging Face.
17
  MODEL_MAP = {
18
  "light": {
19
  "repo_id": "TheBloke/stablelm-zephyr-3b-GGUF",
@@ -31,7 +30,7 @@ MODEL_MAP = {
31
 
32
  # --- GLOBAL CACHE & LOCK ---
33
  llm_cache = {} # Caches loaded models
34
- model_lock = threading.Lock() # Prevents two requests from using the model at once
35
 
36
  app = FastAPI()
37
 
@@ -46,11 +45,6 @@ app.add_middleware(
46
 
47
  # --- Helper Function to Load Model ---
48
  def get_llm_instance(choice: str) -> Llama:
49
- """
50
- Loads a model based on the choice.
51
- Uses hf_hub_download.
52
- Caches the loaded model in memory.
53
- """
54
  if choice not in MODEL_MAP:
55
  logging.error(f"Invalid model choice: {choice}")
56
  return None
@@ -116,14 +110,13 @@ def get_status():
116
  return {
117
  "status": "AI server is online",
118
  "model_loaded": loaded_model,
119
- "models": list(MODEL_MAP.keys()) # <-- This is the CRUCIAL line for your frontend
120
  }
121
 
122
  @app.post("/generate")
123
  async def generate_story(prompt: StoryPrompt):
124
  """
125
  Main generation endpoint.
126
- Uses the thread lock to ensure stability.
127
  """
128
  logging.info("Request received. Waiting to acquire model lock...")
129
  with model_lock:
@@ -134,7 +127,6 @@ async def generate_story(prompt: StoryPrompt):
134
  logging.error(f"Failed to get model for choice: {prompt.model_choice}")
135
  return JSONResponse(status_code=503, content={"error": "The AI model is not available or failed to load."})
136
 
137
- # Format the prompt (Zephyr/ChatML format)
138
  final_prompt = f"""<|user|>
139
  Here is the story so far:
140
  {prompt.story_memory}
@@ -165,4 +157,4 @@ Generate the next part of the story.<|endoftext|>
165
  logging.error(f"An internal error occurred during generation: {e}", exc_info=True)
166
  return JSONResponse(status_code=500, content={"error": "An unexpected error occurred."})
167
  finally:
168
- logging.info("Releasing model lock.")
 
12
  logging.basicConfig(level=logging.INFO)
13
 
14
  # --- MODEL MAP ---
15
+ # Maps the frontend keys to the real model files
 
16
  MODEL_MAP = {
17
  "light": {
18
  "repo_id": "TheBloke/stablelm-zephyr-3b-GGUF",
 
30
 
31
  # --- GLOBAL CACHE & LOCK ---
32
  llm_cache = {} # Caches loaded models
33
+ model_lock = threading.Lock() # Prevents crashes
34
 
35
  app = FastAPI()
36
 
 
45
 
46
  # --- Helper Function to Load Model ---
47
  def get_llm_instance(choice: str) -> Llama:
 
 
 
 
 
48
  if choice not in MODEL_MAP:
49
  logging.error(f"Invalid model choice: {choice}")
50
  return None
 
110
  return {
111
  "status": "AI server is online",
112
  "model_loaded": loaded_model,
113
+ "models": list(MODEL_MAP.keys()) # This is the CRUCIAL line for your frontend
114
  }
115
 
116
  @app.post("/generate")
117
  async def generate_story(prompt: StoryPrompt):
118
  """
119
  Main generation endpoint.
 
120
  """
121
  logging.info("Request received. Waiting to acquire model lock...")
122
  with model_lock:
 
127
  logging.error(f"Failed to get model for choice: {prompt.model_choice}")
128
  return JSONResponse(status_code=503, content={"error": "The AI model is not available or failed to load."})
129
 
 
130
  final_prompt = f"""<|user|>
131
  Here is the story so far:
132
  {prompt.story_memory}
 
157
  logging.error(f"An internal error occurred during generation: {e}", exc_info=True)
158
  return JSONResponse(status_code=500, content={"error": "An unexpected error occurred."})
159
  finally:
160
+ logging.info("Releasing model lock.")