Spaces:

Juna190825
/

mylocalmodels

Runtime error

App Files Files Community

Juna190825 commited on Aug 11

Commit

d8d0f11

verified ·

1 Parent(s): fe55dbd

Update Dockerfile

Browse files

Files changed (1) hide show

app.py +29 -43

app.py CHANGED Viewed

@@ -118,7 +118,6 @@ from transformers import AutoModelForCausalLM, AutoTokenizer
 from huggingface_hub import login
 import torch
 import os
-import time  # For manual retries
 # Authentication
 login(token=os.getenv('HF_TOKEN'))
@@ -128,50 +127,37 @@ MODEL_ID = "meta-llama/Llama-2-7b-chat-hf"
 CACHE_DIR = "/cache/models"
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
-def load_model_with_retry(max_retries=3):
-    for attempt in range(max_retries):
-        try:
-            # First try loading from local cache only
-            if attempt == 0:
-                print("Attempting to load from local cache...")
-                try:
-                    model = AutoModelForCausalLM.from_pretrained(
-                        MODEL_ID,
-                        cache_dir=CACHE_DIR,
-                        local_files_only=True
-                    ).to(DEVICE)
-                    tokenizer = AutoTokenizer.from_pretrained(
-                        MODEL_ID,
-                        cache_dir=CACHE_DIR,
-                        local_files_only=True
-                    )
-                    return model, tokenizer
-                except OSError:
-                    print("Cache not found, will download...")
-                    continue
-            # Download with retry
-            print(f"Downloading model (attempt {attempt + 1})...")
-            model = AutoModelForCausalLM.from_pretrained(
-                MODEL_ID,
-                cache_dir=CACHE_DIR
-            ).to(DEVICE)
-            tokenizer = AutoTokenizer.from_pretrained(
-                MODEL_ID,
-                cache_dir=CACHE_DIR
-            )
-            return model, tokenizer
-        except Exception as e:
-            if attempt == max_retries - 1:
-                raise RuntimeError(f"Failed after {max_retries} attempts: {str(e)}")
-            wait_time = min(2 ** (attempt + 1), 10)
-            print(f"Attempt {attempt + 1} failed ({str(e)}), retrying in {wait_time}s...")
-            time.sleep(wait_time)
 # Load model
-model, tokenizer = load_model_with_retry()
 def generate_text(prompt, max_length=200):
     inputs = tokenizer(prompt, return_tensors="pt").to(DEVICE)

 from huggingface_hub import login
 import torch
 import os
 # Authentication
 login(token=os.getenv('HF_TOKEN'))
 CACHE_DIR = "/cache/models"
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
+def load_model():
+    """Load model directly, attempting cache first"""
+    try:
+        # Try loading from cache
+        print("Attempting to load from cache...")
+        model = AutoModelForCausalLM.from_pretrained(
+            MODEL_ID,
+            cache_dir=CACHE_DIR,
+            local_files_only=True  # Force cache usage
+        ).to(DEVICE)
+        tokenizer = AutoTokenizer.from_pretrained(
+            MODEL_ID,
+            cache_dir=CACHE_DIR,
+            local_files_only=True
+        )
+    except OSError:
+        # Fallback to download if cache missing
+        print("Cache not found, downloading...")
+        model = AutoModelForCausalLM.from_pretrained(
+            MODEL_ID,
+            cache_dir=CACHE_DIR
+        ).to(DEVICE)
+        tokenizer = AutoTokenizer.from_pretrained(
+            MODEL_ID,
+            cache_dir=CACHE_DIR
+        )
+    return model, tokenizer
 # Load model
+model, tokenizer = load_model()
 def generate_text(prompt, max_length=200):
     inputs = tokenizer(prompt, return_tensors="pt").to(DEVICE)