Spaces:

Shivangguptasih
/

fastapi-text-editor

Runtime error

Shivangguptasih commited on Oct 18, 2025

Commit

ec32a0a

verified ·

1 Parent(s): 348ae1d

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,7 +1,6 @@
 from fastapi import FastAPI, HTTPException
 from pydantic import BaseModel
 import os
-from ctransformers.hub import get_local_dir
 from ctransformers import AutoModelForCausalLM, AutoTokenizer
 # --- 1. Initialize FastAPI App ---
@@ -11,18 +10,15 @@ app = FastAPI()
 # This will run when the Docker container starts.
 print("Loading model and tokenizer... This may take a few minutes.")
 try:
-    # --- CHANGE: Use a quantized, open-access Mistral model ---
     model_id = "TheBloke/Mistral-7B-Instruct-v0.2-GGUF"
-    model_file = "mistral-7b-instruct-v0.2.Q4_K_M.gguf" # A good balance of quality and size
-    # Download model if not already present
-    get_local_dir(model_id, model_file=model_file)
-    # Load the model using ctransformers
     # Set gpu_layers to a number > 0 to use GPU, 0 for CPU
     # A T4 GPU can handle around 30-35 layers
     gpu_layers = 30 if os.environ.get("SPACE_GPU") is not None else 0
     model = AutoModelForCausalLM.from_pretrained(
         model_id,
         model_file=model_file,
@@ -92,4 +88,4 @@ def edit_text(input_data: TextInput):
     except Exception as e:
         print(f"Error during model generation: {e}")
-        raise HTTPException(status_code=500, detail=f"An error occurred while generating the response from the model: {e}")

 from fastapi import FastAPI, HTTPException
 from pydantic import BaseModel
 import os
 from ctransformers import AutoModelForCausalLM, AutoTokenizer
 # --- 1. Initialize FastAPI App ---
 # This will run when the Docker container starts.
 print("Loading model and tokenizer... This may take a few minutes.")
 try:
+    # --- Use a quantized, open-access Mistral model ---
     model_id = "TheBloke/Mistral-7B-Instruct-v0.2-GGUF"
+    model_file = "mistral-7b-instruct-v0.2.Q4_K_M.gguf"
     # Set gpu_layers to a number > 0 to use GPU, 0 for CPU
     # A T4 GPU can handle around 30-35 layers
     gpu_layers = 30 if os.environ.get("SPACE_GPU") is not None else 0
+    # Load the model. ctransformers will automatically download the model_file if not present.
     model = AutoModelForCausalLM.from_pretrained(
         model_id,
         model_file=model_file,
     except Exception as e:
         print(f"Error during model generation: {e}")
+        raise HTTPException(status_code=500, detail=f"An error occurred while generating the response from the model: {e}")