Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,7 +1,6 @@
|
|
| 1 |
from fastapi import FastAPI, HTTPException
|
| 2 |
from pydantic import BaseModel
|
| 3 |
import os
|
| 4 |
-
from ctransformers.hub import get_local_dir
|
| 5 |
from ctransformers import AutoModelForCausalLM, AutoTokenizer
|
| 6 |
|
| 7 |
# --- 1. Initialize FastAPI App ---
|
|
@@ -11,18 +10,15 @@ app = FastAPI()
|
|
| 11 |
# This will run when the Docker container starts.
|
| 12 |
print("Loading model and tokenizer... This may take a few minutes.")
|
| 13 |
try:
|
| 14 |
-
# ---
|
| 15 |
model_id = "TheBloke/Mistral-7B-Instruct-v0.2-GGUF"
|
| 16 |
-
model_file = "mistral-7b-instruct-v0.2.Q4_K_M.gguf"
|
| 17 |
|
| 18 |
-
# Download model if not already present
|
| 19 |
-
get_local_dir(model_id, model_file=model_file)
|
| 20 |
-
|
| 21 |
-
# Load the model using ctransformers
|
| 22 |
# Set gpu_layers to a number > 0 to use GPU, 0 for CPU
|
| 23 |
# A T4 GPU can handle around 30-35 layers
|
| 24 |
gpu_layers = 30 if os.environ.get("SPACE_GPU") is not None else 0
|
| 25 |
|
|
|
|
| 26 |
model = AutoModelForCausalLM.from_pretrained(
|
| 27 |
model_id,
|
| 28 |
model_file=model_file,
|
|
@@ -92,4 +88,4 @@ def edit_text(input_data: TextInput):
|
|
| 92 |
|
| 93 |
except Exception as e:
|
| 94 |
print(f"Error during model generation: {e}")
|
| 95 |
-
raise HTTPException(status_code=500, detail=f"An error occurred while generating the response from the model: {e}")
|
|
|
|
| 1 |
from fastapi import FastAPI, HTTPException
|
| 2 |
from pydantic import BaseModel
|
| 3 |
import os
|
|
|
|
| 4 |
from ctransformers import AutoModelForCausalLM, AutoTokenizer
|
| 5 |
|
| 6 |
# --- 1. Initialize FastAPI App ---
|
|
|
|
| 10 |
# This will run when the Docker container starts.
|
| 11 |
print("Loading model and tokenizer... This may take a few minutes.")
|
| 12 |
try:
|
| 13 |
+
# --- Use a quantized, open-access Mistral model ---
|
| 14 |
model_id = "TheBloke/Mistral-7B-Instruct-v0.2-GGUF"
|
| 15 |
+
model_file = "mistral-7b-instruct-v0.2.Q4_K_M.gguf"
|
| 16 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
# Set gpu_layers to a number > 0 to use GPU, 0 for CPU
|
| 18 |
# A T4 GPU can handle around 30-35 layers
|
| 19 |
gpu_layers = 30 if os.environ.get("SPACE_GPU") is not None else 0
|
| 20 |
|
| 21 |
+
# Load the model. ctransformers will automatically download the model_file if not present.
|
| 22 |
model = AutoModelForCausalLM.from_pretrained(
|
| 23 |
model_id,
|
| 24 |
model_file=model_file,
|
|
|
|
| 88 |
|
| 89 |
except Exception as e:
|
| 90 |
print(f"Error during model generation: {e}")
|
| 91 |
+
raise HTTPException(status_code=500, detail=f"An error occurred while generating the response from the model: {e}")
|