programci48 commited on
Commit
a3fcff1
·
verified ·
1 Parent(s): 7af1417

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -13
app.py CHANGED
@@ -4,35 +4,25 @@ from fastapi import FastAPI, Request
4
  from transformers import AutoTokenizer, AutoModelForCausalLM
5
  from peft import PeftModel
6
 
7
- # 🔐 HF_TOKEN değişkenini oku
8
- hf_token = os.getenv("HF_TOKEN")
9
 
10
- # 🔧 Model bilgisi
11
  base_model_id = "google/gemma-1.1-2b-it"
12
  lora_model_id = "programci48/heytak-lora-v1"
13
 
14
- # 📦 Model yükleme
15
  tokenizer = AutoTokenizer.from_pretrained(base_model_id, token=hf_token)
16
- base_model = AutoModelForCausalLM.from_pretrained(
17
- base_model_id,
18
- torch_dtype=torch.float32,
19
- device_map=None,
20
- token=hf_token
21
- )
22
  model = PeftModel.from_pretrained(base_model, lora_model_id, token=hf_token)
23
  model.eval()
24
 
25
- # 🚀 FastAPI app
26
  app = FastAPI()
27
 
28
  @app.post("/run/predict")
29
  async def predict(request: Request):
30
  data = await request.json()
31
  prompt = data["data"][0]
32
-
33
  inputs = tokenizer(prompt, return_tensors="pt")
34
  with torch.no_grad():
35
  outputs = model.generate(**inputs, max_new_tokens=100)
36
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)
37
-
38
  return {"data": [response]}
 
4
  from transformers import AutoTokenizer, AutoModelForCausalLM
5
  from peft import PeftModel
6
 
7
+ hf_token = os.getenv("HF_TOKEN") # 🔑 Token ortam değişkeninden
 
8
 
 
9
  base_model_id = "google/gemma-1.1-2b-it"
10
  lora_model_id = "programci48/heytak-lora-v1"
11
 
12
+ # 🔧 Tokenizer ve model yükleme
13
  tokenizer = AutoTokenizer.from_pretrained(base_model_id, token=hf_token)
14
+ base_model = AutoModelForCausalLM.from_pretrained(base_model_id, torch_dtype=torch.float32, device_map=None, token=hf_token)
 
 
 
 
 
15
  model = PeftModel.from_pretrained(base_model, lora_model_id, token=hf_token)
16
  model.eval()
17
 
 
18
  app = FastAPI()
19
 
20
  @app.post("/run/predict")
21
  async def predict(request: Request):
22
  data = await request.json()
23
  prompt = data["data"][0]
 
24
  inputs = tokenizer(prompt, return_tensors="pt")
25
  with torch.no_grad():
26
  outputs = model.generate(**inputs, max_new_tokens=100)
27
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)
 
28
  return {"data": [response]}