Bogdan Radchenko commited on
Commit
b27ec7c
·
verified ·
1 Parent(s): fbac5bf

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -11
app.py CHANGED
@@ -1,6 +1,6 @@
1
  from fastapi import FastAPI, HTTPException
2
  from pydantic import BaseModel
3
- from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
4
  from huggingface_hub import login
5
  import os
6
  import torch
@@ -16,20 +16,12 @@ app = FastAPI(
16
 
17
  model_name = "google/gemma-3-270m"
18
 
19
- bnb_config = BitsAndBytesConfig(
20
- load_in_4bit=True,
21
- bnb_4bit_use_double_quant=True,
22
- bnb_4bit_quant_type="nf4",
23
- bnb_4bit_compute_dtype=torch.bfloat16
24
- )
25
-
26
  tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
27
  model = AutoModelForCausalLM.from_pretrained(
28
  model_name,
29
- quantization_config=bnb_config,
30
  device_map="auto",
31
  trust_remote_code=True,
32
- torch_dtype=torch.bfloat16
33
  )
34
  model.eval()
35
 
@@ -57,7 +49,7 @@ async def generate_text(request: GenerateRequest):
57
 
58
  return {"generated_text": generated_text}
59
  except Exception as e:
60
- raise HTTPException(status_code=500, detail=f"VexaAI-Lab: Ошибка генерации: {str(e)}")
61
 
62
  @app.get("/")
63
  async def root():
 
1
  from fastapi import FastAPI, HTTPException
2
  from pydantic import BaseModel
3
+ from transformers import AutoTokenizer, AutoModelForCausalLM
4
  from huggingface_hub import login
5
  import os
6
  import torch
 
16
 
17
  model_name = "google/gemma-3-270m"
18
 
 
 
 
 
 
 
 
19
  tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
20
  model = AutoModelForCausalLM.from_pretrained(
21
  model_name,
 
22
  device_map="auto",
23
  trust_remote_code=True,
24
+ torch_dtype=torch.float16
25
  )
26
  model.eval()
27
 
 
49
 
50
  return {"generated_text": generated_text}
51
  except Exception as e:
52
+ raise HTTPException(status_code=500, detail=f"VexaAI-Lab: HTTP/S error: {str(e)}")
53
 
54
  @app.get("/")
55
  async def root():