Spaces:

pierreramez
/

chatbot-api

Sleeping

App Files Files Community

pierreramez commited on Jan 17

Commit

8f7da87

verified ·

1 Parent(s): d1dc38a

updated app.py

Browse files

Files changed (1) hide show

app.py +10 -9

app.py CHANGED Viewed

@@ -119,7 +119,7 @@ class ModelManager:
         # GPU check for 4-bit loading
         if use_4bit and self._device == "cuda":
-            print("🚀 GPU detected: Loading in 4-bit mode")
             try:
                 from transformers import BitsAndBytesConfig
@@ -138,14 +138,14 @@ class ModelManager:
                     torch_dtype=torch.float16,
                 )
             except ImportError:
-                print("⚠️ bitsandbytes not installed. Falling back to standard loading.")
                 base_model = AutoModelForCausalLM.from_pretrained(
                     model_name,
                     device_map="auto",
                     trust_remote_code=True,
                 )
         else:
-            print(f"⚠️ Using {self._device} (No GPU or use_4bit=False). Loading standard model.")
             base_model = AutoModelForCausalLM.from_pretrained(
                 model_name,
                 device_map=self._device,
@@ -162,9 +162,9 @@ class ModelManager:
                     torch_dtype=torch.float16 if self._device == "cuda" else torch.float32
                 )
                 self._current_adapter = adapter_path
-                print(f"✅ Adapter loaded successfully")
             except Exception as e:
-                print(f"⚠️ Could not load adapter: {e}")
                 print("   Using base model without adapter")
                 self._model = base_model
                 self._current_adapter = None
@@ -308,13 +308,13 @@ async def startup_event():
     print("Starting up...")
     model_manager.initialize(
-        # 1. The Base Model (The heavy lifter)
         model_name="meta-llama/Llama-3.2-3B-Instruct",
-        # 2. Adapter (The personalization) - YOUR SPECIFIC REPO
         adapter_path="pierreramez/Llama-3.2-3B-Instruct-bnb-4bit_finetuned",
-        # 3. CPU Optimization (Must be False for free tier)
         use_4bit=False
     )
@@ -487,7 +487,7 @@ async def reload_adapter(request: ReloadAdapterRequest):
     """Hot reload model."""
     try:
         model_manager.initialize(
-            model_name="meta-llama/Llama-3.2-1B-Instruct",
             adapter_path=request.adapter_path,
             use_4bit=False
         )
@@ -498,3 +498,4 @@ async def reload_adapter(request: ReloadAdapterRequest):
 if __name__ == "__main__":
     import uvicorn
     uvicorn.run("app:app", host="0.0.0.0", port=7860, reload=True)

         # GPU check for 4-bit loading
         if use_4bit and self._device == "cuda":
+            print("GPU detected: Loading in 4-bit mode")
             try:
                 from transformers import BitsAndBytesConfig
                     torch_dtype=torch.float16,
                 )
             except ImportError:
+                print("bitsandbytes not installed. Falling back to standard loading.")
                 base_model = AutoModelForCausalLM.from_pretrained(
                     model_name,
                     device_map="auto",
                     trust_remote_code=True,
                 )
         else:
+            print(f"Using {self._device} (No GPU or use_4bit=False). Loading standard model.")
             base_model = AutoModelForCausalLM.from_pretrained(
                 model_name,
                 device_map=self._device,
                     torch_dtype=torch.float16 if self._device == "cuda" else torch.float32
                 )
                 self._current_adapter = adapter_path
+                print(f"Adapter loaded successfully")
             except Exception as e:
+                print(f"Could not load adapter: {e}")
                 print("   Using base model without adapter")
                 self._model = base_model
                 self._current_adapter = None
     print("Starting up...")
     model_manager.initialize(
+        # 1. The Base Model
         model_name="meta-llama/Llama-3.2-3B-Instruct",
+        # 2. Adapter
         adapter_path="pierreramez/Llama-3.2-3B-Instruct-bnb-4bit_finetuned",
+        # 3. CPU Optimization
         use_4bit=False
     )
     """Hot reload model."""
     try:
         model_manager.initialize(
+            model_name="meta-llama/Llama-3.2-3B-Instruct",
             adapter_path=request.adapter_path,
             use_4bit=False
         )
 if __name__ == "__main__":
     import uvicorn
     uvicorn.run("app:app", host="0.0.0.0", port=7860, reload=True)