programci48 commited on
Commit
d98e8f9
·
verified ·
1 Parent(s): 8a769a3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -49
app.py CHANGED
@@ -3,66 +3,55 @@ import torch
3
  from fastapi import FastAPI, Request
4
  from transformers import AutoTokenizer, AutoModelForCausalLM
5
  from peft import PeftModel
6
- from typing import Dict, Any
7
  import logging
8
 
9
  # Log ayarları
10
  logging.basicConfig(level=logging.INFO)
11
  logger = logging.getLogger(__name__)
12
 
13
- # Ortam değişkenleri ve konfigürasyon
14
- HF_TOKEN = os.getenv("HF_TOKEN")
15
  if not HF_TOKEN:
16
- logger.error("HF_TOKEN environment variable not set!")
17
- raise ValueError("HF_TOKEN environment variable not set!")
18
 
19
- # Model konfigürasyonu
20
  MODEL_CONFIG = {
21
  "base_model": "google/gemma-1.1-2b-it",
22
  "lora_model": "programci48/heytak-lora-v1",
23
  "cache_dir": "/tmp/huggingface",
 
24
  "device": "cuda" if torch.cuda.is_available() else "cpu",
25
- "torch_dtype": torch.float16 if torch.cuda.is_available() else torch.float32,
26
- "offload_folder": "/tmp/offload" # Offload için yeni dizin
27
  }
28
 
29
- def load_models() -> Dict[str, Any]:
30
- """Modelleri yükleyen fonksiyon"""
31
  try:
32
- # Offload dizini oluştur
33
- os.makedirs(MODEL_CONFIG["offload_folder"], exist_ok=True)
34
-
35
- logger.info("Tokenizer yükleniyor...")
36
  tokenizer = AutoTokenizer.from_pretrained(
37
  MODEL_CONFIG["base_model"],
38
  token=HF_TOKEN,
39
  cache_dir=MODEL_CONFIG["cache_dir"]
40
  )
41
 
42
- logger.info(f"Temel model yükleniyor ({MODEL_CONFIG['device']})...")
43
  base_model = AutoModelForCausalLM.from_pretrained(
44
  MODEL_CONFIG["base_model"],
45
  torch_dtype=MODEL_CONFIG["torch_dtype"],
46
  device_map="auto" if MODEL_CONFIG["device"] == "cuda" else None,
47
  token=HF_TOKEN,
48
- low_cpu_mem_usage=True,
49
  cache_dir=MODEL_CONFIG["cache_dir"],
50
  offload_folder=MODEL_CONFIG["offload_folder"]
51
  )
52
 
53
- logger.info("LoRA adaptörü yükleniyor...")
54
  model = PeftModel.from_pretrained(
55
  base_model,
56
  MODEL_CONFIG["lora_model"],
57
  token=HF_TOKEN
58
  )
59
  model.eval()
60
-
61
- if MODEL_CONFIG["device"] == "cpu":
62
- model = model.to("cpu")
63
- torch.cuda.empty_cache()
64
 
65
- logger.info("Modeller başarıyla yüklendi!")
66
  return {"tokenizer": tokenizer, "model": model}
67
 
68
  except Exception as e:
@@ -70,54 +59,53 @@ def load_models() -> Dict[str, Any]:
70
  raise
71
 
72
  # Uygulama başlatma
73
- try:
74
- models = load_models()
75
- app = FastAPI(title="Gemma-LoRA API", version="1.0")
76
- except Exception as e:
77
- logger.critical(f"Uygulama başlatılamadı: {str(e)}")
78
- raise
79
-
80
- # API Endpoint'leri
81
- @app.post("/run/predict")
 
 
 
82
  async def predict(request: Request):
83
  try:
84
  data = await request.json()
85
- prompt = data["data"][0]
86
- logger.info(f"Gelen istek: {prompt[:50]}...")
87
-
88
- inputs = models["tokenizer"](
89
- prompt,
90
  return_tensors="pt",
91
  truncation=True,
92
  max_length=512
93
- ).to(models["model"].device)
94
 
95
  with torch.no_grad():
96
- outputs = models["model"].generate(
97
  **inputs,
98
  max_new_tokens=100,
99
- do_sample=True,
100
  temperature=0.7,
101
- top_p=0.9,
102
- repetition_penalty=1.1
103
  )
104
 
105
- response = models["tokenizer"].decode(
106
- outputs[0],
107
  skip_special_tokens=True
108
  ).strip()
109
 
110
- logger.info(f"Oluşturulan yanıt: {response[:50]}...")
111
- return {"data": [response]}
112
 
113
  except Exception as e:
114
- logger.error(f"İşlem hatası: {str(e)}")
115
  return {"error": str(e)}, 500
116
 
117
- @app.get("/health")
118
  async def health_check():
119
  return {
120
- "status": "healthy",
121
  "device": MODEL_CONFIG["device"],
122
- "torch_dtype": str(MODEL_CONFIG["torch_dtype"])
123
  }
 
3
  from fastapi import FastAPI, Request
4
  from transformers import AutoTokenizer, AutoModelForCausalLM
5
  from peft import PeftModel
 
6
  import logging
7
 
8
  # Log ayarları
9
  logging.basicConfig(level=logging.INFO)
10
  logger = logging.getLogger(__name__)
11
 
12
+ # HF Spaces otomatik olarak HF_TOKEN sağlar
13
+ HF_TOKEN = os.getenv("HF_TOKEN", None)
14
  if not HF_TOKEN:
15
+ logger.warning("HF_TOKEN bulunamadı! Genel modellerle çalışılacak")
 
16
 
17
+ # Model konfigürasyonu (HF Spaces için optimize)
18
  MODEL_CONFIG = {
19
  "base_model": "google/gemma-1.1-2b-it",
20
  "lora_model": "programci48/heytak-lora-v1",
21
  "cache_dir": "/tmp/huggingface",
22
+ "offload_folder": "/tmp/offload",
23
  "device": "cuda" if torch.cuda.is_available() else "cpu",
24
+ "torch_dtype": torch.float16 if torch.cuda.is_available() else torch.float32
 
25
  }
26
 
27
+ def load_models():
28
+ """HF Spaces için optimize edilmiş model yükleme"""
29
  try:
30
+ # Tokenizer
 
 
 
31
  tokenizer = AutoTokenizer.from_pretrained(
32
  MODEL_CONFIG["base_model"],
33
  token=HF_TOKEN,
34
  cache_dir=MODEL_CONFIG["cache_dir"]
35
  )
36
 
37
+ # Model
38
  base_model = AutoModelForCausalLM.from_pretrained(
39
  MODEL_CONFIG["base_model"],
40
  torch_dtype=MODEL_CONFIG["torch_dtype"],
41
  device_map="auto" if MODEL_CONFIG["device"] == "cuda" else None,
42
  token=HF_TOKEN,
 
43
  cache_dir=MODEL_CONFIG["cache_dir"],
44
  offload_folder=MODEL_CONFIG["offload_folder"]
45
  )
46
 
47
+ # LoRA Adaptörü
48
  model = PeftModel.from_pretrained(
49
  base_model,
50
  MODEL_CONFIG["lora_model"],
51
  token=HF_TOKEN
52
  )
53
  model.eval()
 
 
 
 
54
 
 
55
  return {"tokenizer": tokenizer, "model": model}
56
 
57
  except Exception as e:
 
59
  raise
60
 
61
  # Uygulama başlatma
62
+ app = FastAPI(title="HeyTak AI API")
63
+
64
+ @app.on_event("startup")
65
+ async def startup_event():
66
+ try:
67
+ app.state.models = load_models()
68
+ logger.info("Modeller başarıyla yüklendi!")
69
+ except Exception as e:
70
+ logger.critical(f"Başlatma hatası: {str(e)}")
71
+ raise
72
+
73
+ @app.post("/predict")
74
  async def predict(request: Request):
75
  try:
76
  data = await request.json()
77
+ prompt = data.get("inputs", "")
78
+
79
+ inputs = app.state.models["tokenizer"](
80
+ prompt,
 
81
  return_tensors="pt",
82
  truncation=True,
83
  max_length=512
84
+ ).to(app.state.models["model"].device)
85
 
86
  with torch.no_grad():
87
+ outputs = app.state.models["model"].generate(
88
  **inputs,
89
  max_new_tokens=100,
 
90
  temperature=0.7,
91
+ top_p=0.9
 
92
  )
93
 
94
+ response = app.state.models["tokenizer"].decode(
95
+ outputs[0],
96
  skip_special_tokens=True
97
  ).strip()
98
 
99
+ return {"generated_text": response}
 
100
 
101
  except Exception as e:
102
+ logger.error(f"Tahmin hatası: {str(e)}")
103
  return {"error": str(e)}, 500
104
 
105
+ @app.get("/")
106
  async def health_check():
107
  return {
108
+ "status": "active",
109
  "device": MODEL_CONFIG["device"],
110
+ "framework": "FastAPI"
111
  }