Update app.py
Browse files
app.py
CHANGED
|
@@ -12,8 +12,7 @@ import threading
|
|
| 12 |
logging.basicConfig(level=logging.INFO)
|
| 13 |
|
| 14 |
# --- MODEL MAP ---
|
| 15 |
-
#
|
| 16 |
-
# to the actual model files on Hugging Face.
|
| 17 |
MODEL_MAP = {
|
| 18 |
"light": {
|
| 19 |
"repo_id": "TheBloke/stablelm-zephyr-3b-GGUF",
|
|
@@ -31,7 +30,7 @@ MODEL_MAP = {
|
|
| 31 |
|
| 32 |
# --- GLOBAL CACHE & LOCK ---
|
| 33 |
llm_cache = {} # Caches loaded models
|
| 34 |
-
model_lock = threading.Lock() # Prevents
|
| 35 |
|
| 36 |
app = FastAPI()
|
| 37 |
|
|
@@ -46,11 +45,6 @@ app.add_middleware(
|
|
| 46 |
|
| 47 |
# --- Helper Function to Load Model ---
|
| 48 |
def get_llm_instance(choice: str) -> Llama:
|
| 49 |
-
"""
|
| 50 |
-
Loads a model based on the choice.
|
| 51 |
-
Uses hf_hub_download.
|
| 52 |
-
Caches the loaded model in memory.
|
| 53 |
-
"""
|
| 54 |
if choice not in MODEL_MAP:
|
| 55 |
logging.error(f"Invalid model choice: {choice}")
|
| 56 |
return None
|
|
@@ -116,14 +110,13 @@ def get_status():
|
|
| 116 |
return {
|
| 117 |
"status": "AI server is online",
|
| 118 |
"model_loaded": loaded_model,
|
| 119 |
-
"models": list(MODEL_MAP.keys()) #
|
| 120 |
}
|
| 121 |
|
| 122 |
@app.post("/generate")
|
| 123 |
async def generate_story(prompt: StoryPrompt):
|
| 124 |
"""
|
| 125 |
Main generation endpoint.
|
| 126 |
-
Uses the thread lock to ensure stability.
|
| 127 |
"""
|
| 128 |
logging.info("Request received. Waiting to acquire model lock...")
|
| 129 |
with model_lock:
|
|
@@ -134,7 +127,6 @@ async def generate_story(prompt: StoryPrompt):
|
|
| 134 |
logging.error(f"Failed to get model for choice: {prompt.model_choice}")
|
| 135 |
return JSONResponse(status_code=503, content={"error": "The AI model is not available or failed to load."})
|
| 136 |
|
| 137 |
-
# Format the prompt (Zephyr/ChatML format)
|
| 138 |
final_prompt = f"""<|user|>
|
| 139 |
Here is the story so far:
|
| 140 |
{prompt.story_memory}
|
|
@@ -165,4 +157,4 @@ Generate the next part of the story.<|endoftext|>
|
|
| 165 |
logging.error(f"An internal error occurred during generation: {e}", exc_info=True)
|
| 166 |
return JSONResponse(status_code=500, content={"error": "An unexpected error occurred."})
|
| 167 |
finally:
|
| 168 |
-
logging.info("Releasing model lock.")
|
|
|
|
| 12 |
logging.basicConfig(level=logging.INFO)
|
| 13 |
|
| 14 |
# --- MODEL MAP ---
|
| 15 |
+
# Maps the frontend keys to the real model files
|
|
|
|
| 16 |
MODEL_MAP = {
|
| 17 |
"light": {
|
| 18 |
"repo_id": "TheBloke/stablelm-zephyr-3b-GGUF",
|
|
|
|
| 30 |
|
| 31 |
# --- GLOBAL CACHE & LOCK ---
|
| 32 |
llm_cache = {} # Caches loaded models
|
| 33 |
+
model_lock = threading.Lock() # Prevents crashes
|
| 34 |
|
| 35 |
app = FastAPI()
|
| 36 |
|
|
|
|
| 45 |
|
| 46 |
# --- Helper Function to Load Model ---
|
| 47 |
def get_llm_instance(choice: str) -> Llama:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 48 |
if choice not in MODEL_MAP:
|
| 49 |
logging.error(f"Invalid model choice: {choice}")
|
| 50 |
return None
|
|
|
|
| 110 |
return {
|
| 111 |
"status": "AI server is online",
|
| 112 |
"model_loaded": loaded_model,
|
| 113 |
+
"models": list(MODEL_MAP.keys()) # This is the CRUCIAL line for your frontend
|
| 114 |
}
|
| 115 |
|
| 116 |
@app.post("/generate")
|
| 117 |
async def generate_story(prompt: StoryPrompt):
|
| 118 |
"""
|
| 119 |
Main generation endpoint.
|
|
|
|
| 120 |
"""
|
| 121 |
logging.info("Request received. Waiting to acquire model lock...")
|
| 122 |
with model_lock:
|
|
|
|
| 127 |
logging.error(f"Failed to get model for choice: {prompt.model_choice}")
|
| 128 |
return JSONResponse(status_code=503, content={"error": "The AI model is not available or failed to load."})
|
| 129 |
|
|
|
|
| 130 |
final_prompt = f"""<|user|>
|
| 131 |
Here is the story so far:
|
| 132 |
{prompt.story_memory}
|
|
|
|
| 157 |
logging.error(f"An internal error occurred during generation: {e}", exc_info=True)
|
| 158 |
return JSONResponse(status_code=500, content={"error": "An unexpected error occurred."})
|
| 159 |
finally:
|
| 160 |
+
logging.info("Releasing model lock.")
|