programci48 commited on
Commit
bd4f63a
·
verified ·
1 Parent(s): a3fcff1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -3
app.py CHANGED
@@ -4,25 +4,38 @@ from fastapi import FastAPI, Request
4
  from transformers import AutoTokenizer, AutoModelForCausalLM
5
  from peft import PeftModel
6
 
7
- hf_token = os.getenv("HF_TOKEN") # 🔑 Token ortam değişkeninden
 
8
 
 
9
  base_model_id = "google/gemma-1.1-2b-it"
10
  lora_model_id = "programci48/heytak-lora-v1"
11
 
12
- # 🔧 Tokenizer ve model yükleme
13
  tokenizer = AutoTokenizer.from_pretrained(base_model_id, token=hf_token)
14
- base_model = AutoModelForCausalLM.from_pretrained(base_model_id, torch_dtype=torch.float32, device_map=None, token=hf_token)
 
 
 
 
 
 
 
15
  model = PeftModel.from_pretrained(base_model, lora_model_id, token=hf_token)
16
  model.eval()
17
 
 
18
  app = FastAPI()
19
 
20
  @app.post("/run/predict")
21
  async def predict(request: Request):
22
  data = await request.json()
23
  prompt = data["data"][0]
 
 
24
  inputs = tokenizer(prompt, return_tensors="pt")
25
  with torch.no_grad():
26
  outputs = model.generate(**inputs, max_new_tokens=100)
27
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)
 
28
  return {"data": [response]}
 
4
  from transformers import AutoTokenizer, AutoModelForCausalLM
5
  from peft import PeftModel
6
 
7
+ # Hugging Face token (gated modeller için gerekli)
8
+ hf_token = os.getenv("HF_TOKEN")
9
 
10
+ # Model ID'leri
11
  base_model_id = "google/gemma-1.1-2b-it"
12
  lora_model_id = "programci48/heytak-lora-v1"
13
 
14
+ # Tokenizer ve model yükleme
15
  tokenizer = AutoTokenizer.from_pretrained(base_model_id, token=hf_token)
16
+
17
+ base_model = AutoModelForCausalLM.from_pretrained(
18
+ base_model_id,
19
+ torch_dtype=torch.float32,
20
+ device_map=None, # Hugging Face CPU ortamı için GPU ayarı yapılmaz
21
+ token=hf_token
22
+ )
23
+
24
  model = PeftModel.from_pretrained(base_model, lora_model_id, token=hf_token)
25
  model.eval()
26
 
27
+ # FastAPI uygulaması
28
  app = FastAPI()
29
 
30
  @app.post("/run/predict")
31
  async def predict(request: Request):
32
  data = await request.json()
33
  prompt = data["data"][0]
34
+
35
+ # Model ile yanıt üret
36
  inputs = tokenizer(prompt, return_tensors="pt")
37
  with torch.no_grad():
38
  outputs = model.generate(**inputs, max_new_tokens=100)
39
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)
40
+
41
  return {"data": [response]}