programci48 commited on
Commit
7c003e4
·
verified ·
1 Parent(s): 620e2b8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -6
app.py CHANGED
@@ -3,7 +3,6 @@ import torch
3
  from fastapi import FastAPI, Request
4
  from transformers import AutoTokenizer, AutoModelForCausalLM
5
  from peft import PeftModel
6
- from huggingface_hub import login
7
  from typing import Dict, Any
8
 
9
  # Hugging Face token
@@ -11,8 +10,9 @@ HF_TOKEN = os.getenv("HF_TOKEN")
11
  if not HF_TOKEN:
12
  raise ValueError("HF_TOKEN environment variable not set!")
13
 
14
- # Login to Hugging Face Hub
15
- login(token=HF_TOKEN)
 
16
 
17
  # Model IDs
18
  BASE_MODEL_ID = "google/gemma-1.1-2b-it"
@@ -21,10 +21,11 @@ LORA_MODEL_ID = "programci48/heytak-lora-v1"
21
  # Load models with error handling and optimizations
22
  def load_models() -> Dict[str, Any]:
23
  try:
24
- # Load tokenizer
25
  tokenizer = AutoTokenizer.from_pretrained(
26
  BASE_MODEL_ID,
27
- token=HF_TOKEN
 
28
  )
29
 
30
  # Load base model with memory optimization
@@ -34,7 +35,7 @@ def load_models() -> Dict[str, Any]:
34
  device_map="auto",
35
  token=HF_TOKEN,
36
  low_cpu_mem_usage=True,
37
- offload_folder="offload" # For CPU offloading if needed
38
  )
39
 
40
  # Load LoRA adapter
 
3
  from fastapi import FastAPI, Request
4
  from transformers import AutoTokenizer, AutoModelForCausalLM
5
  from peft import PeftModel
 
6
  from typing import Dict, Any
7
 
8
  # Hugging Face token
 
10
  if not HF_TOKEN:
11
  raise ValueError("HF_TOKEN environment variable not set!")
12
 
13
+ # Cache dizinini ayarla (yazma izni olan bir dizin)
14
+ os.environ["HF_HOME"] = "/tmp/huggingface"
15
+ os.environ["TRANSFORMERS_CACHE"] = "/tmp/huggingface"
16
 
17
  # Model IDs
18
  BASE_MODEL_ID = "google/gemma-1.1-2b-it"
 
21
  # Load models with error handling and optimizations
22
  def load_models() -> Dict[str, Any]:
23
  try:
24
+ # Load tokenizer (login işlemi olmadan doğrudan token kullanarak)
25
  tokenizer = AutoTokenizer.from_pretrained(
26
  BASE_MODEL_ID,
27
+ token=HF_TOKEN,
28
+ cache_dir="/tmp/huggingface"
29
  )
30
 
31
  # Load base model with memory optimization
 
35
  device_map="auto",
36
  token=HF_TOKEN,
37
  low_cpu_mem_usage=True,
38
+ cache_dir="/tmp/huggingface"
39
  )
40
 
41
  # Load LoRA adapter