Shivangguptasih commited on
Commit
ec32a0a
·
verified ·
1 Parent(s): 348ae1d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -8
app.py CHANGED
@@ -1,7 +1,6 @@
1
  from fastapi import FastAPI, HTTPException
2
  from pydantic import BaseModel
3
  import os
4
- from ctransformers.hub import get_local_dir
5
  from ctransformers import AutoModelForCausalLM, AutoTokenizer
6
 
7
  # --- 1. Initialize FastAPI App ---
@@ -11,18 +10,15 @@ app = FastAPI()
11
  # This will run when the Docker container starts.
12
  print("Loading model and tokenizer... This may take a few minutes.")
13
  try:
14
- # --- CHANGE: Use a quantized, open-access Mistral model ---
15
  model_id = "TheBloke/Mistral-7B-Instruct-v0.2-GGUF"
16
- model_file = "mistral-7b-instruct-v0.2.Q4_K_M.gguf" # A good balance of quality and size
17
 
18
- # Download model if not already present
19
- get_local_dir(model_id, model_file=model_file)
20
-
21
- # Load the model using ctransformers
22
  # Set gpu_layers to a number > 0 to use GPU, 0 for CPU
23
  # A T4 GPU can handle around 30-35 layers
24
  gpu_layers = 30 if os.environ.get("SPACE_GPU") is not None else 0
25
 
 
26
  model = AutoModelForCausalLM.from_pretrained(
27
  model_id,
28
  model_file=model_file,
@@ -92,4 +88,4 @@ def edit_text(input_data: TextInput):
92
 
93
  except Exception as e:
94
  print(f"Error during model generation: {e}")
95
- raise HTTPException(status_code=500, detail=f"An error occurred while generating the response from the model: {e}")
 
1
  from fastapi import FastAPI, HTTPException
2
  from pydantic import BaseModel
3
  import os
 
4
  from ctransformers import AutoModelForCausalLM, AutoTokenizer
5
 
6
  # --- 1. Initialize FastAPI App ---
 
10
  # This will run when the Docker container starts.
11
  print("Loading model and tokenizer... This may take a few minutes.")
12
  try:
13
+ # --- Use a quantized, open-access Mistral model ---
14
  model_id = "TheBloke/Mistral-7B-Instruct-v0.2-GGUF"
15
+ model_file = "mistral-7b-instruct-v0.2.Q4_K_M.gguf"
16
 
 
 
 
 
17
  # Set gpu_layers to a number > 0 to use GPU, 0 for CPU
18
  # A T4 GPU can handle around 30-35 layers
19
  gpu_layers = 30 if os.environ.get("SPACE_GPU") is not None else 0
20
 
21
+ # Load the model. ctransformers will automatically download the model_file if not present.
22
  model = AutoModelForCausalLM.from_pretrained(
23
  model_id,
24
  model_file=model_file,
 
88
 
89
  except Exception as e:
90
  print(f"Error during model generation: {e}")
91
+ raise HTTPException(status_code=500, detail=f"An error occurred while generating the response from the model: {e}")