Bogdan Radchenko commited on
Commit
03eabd1
·
verified ·
1 Parent(s): be65e52

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +1 -3
app.py CHANGED
@@ -16,7 +16,6 @@ app = FastAPI(
16
 
17
  model_name = "google/gemma-3-270m"
18
 
19
- # Quantisation to 4-bit
20
  bnb_config = BitsAndBytesConfig(
21
  load_in_4bit=True,
22
  bnb_4bit_use_double_quant=True,
@@ -24,7 +23,6 @@ bnb_config = BitsAndBytesConfig(
24
  bnb_4bit_compute_dtype=torch.bfloat16
25
  )
26
 
27
- # Model loading.
28
  tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
29
  model = AutoModelForCausalLM.from_pretrained(
30
  model_name,
@@ -59,7 +57,7 @@ async def generate_text(request: GenerateRequest):
59
 
60
  return {"generated_text": generated_text}
61
  except Exception as e:
62
- raise HTTPException(status_code=500, detail=f"VexaAI-Lab: HTTP/S error: {str(e)}")
63
 
64
  @app.get("/")
65
  async def root():
 
16
 
17
  model_name = "google/gemma-3-270m"
18
 
 
19
  bnb_config = BitsAndBytesConfig(
20
  load_in_4bit=True,
21
  bnb_4bit_use_double_quant=True,
 
23
  bnb_4bit_compute_dtype=torch.bfloat16
24
  )
25
 
 
26
  tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
27
  model = AutoModelForCausalLM.from_pretrained(
28
  model_name,
 
57
 
58
  return {"generated_text": generated_text}
59
  except Exception as e:
60
+ raise HTTPException(status_code=500, detail=f"VexaAI-Lab: Ошибка генерации: {str(e)}")
61
 
62
  @app.get("/")
63
  async def root():