Spaces:

1MR
/

Mychatmodel

Sleeping

App Files Files Community

1MR commited on Jul 7, 2025

Commit

bcca643

verified ·

1 Parent(s): eed0b29

Update app.py

Browse files

Files changed (1) hide show

app.py +41 -9

app.py CHANGED Viewed

@@ -43,16 +43,26 @@ async def lifespan(app: FastAPI):
     global model, tokenizer
     logger.info("Loading model and tokenizer...")
-    # Replace with your model path/name
-    model_name = "Qwen/Qwen3-4B"  # or local path
-    # model_name = "your-username/your-fine-tuned-model"  # or local path
     try:
-        tokenizer = AutoTokenizer.from_pretrained(model_name)
         model = AutoModelForCausalLM.from_pretrained(
             model_name,
-            torch_dtype=torch.float16,
-            device_map="auto",
             trust_remote_code=True
         )
@@ -64,7 +74,26 @@ async def lifespan(app: FastAPI):
     except Exception as e:
         logger.error(f"Failed to load model: {e}")
-        raise e
     yield
@@ -104,9 +133,12 @@ def generate_response(
 ) -> tuple[str, Dict[str, int]]:
     """Generate response using the loaded model"""
     inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=2048)
-    input_ids = inputs["input_ids"].to(model.device)
-    attention_mask = inputs["attention_mask"].to(model.device)
     input_length = input_ids.shape[1]

     global model, tokenizer
     logger.info("Loading model and tokenizer...")
+    # SOLUTION 1: Use a more compatible model
+    # Replace Qwen3-4B with a widely supported model
+    model_name = "microsoft/DialoGPT-medium"  # Alternative: "gpt2", "microsoft/DialoGPT-small"
+    # SOLUTION 2: If you want to use Qwen models, try these alternatives:
+    # model_name = "Qwen/Qwen1.5-0.5B-Chat"  # Smaller, more compatible Qwen model
+    # model_name = "Qwen/Qwen2-0.5B-Instruct"  # Even smaller option
     try:
+        # SOLUTION 3: Add trust_remote_code=True and use_fast=False for better compatibility
+        tokenizer = AutoTokenizer.from_pretrained(
+            model_name,
+            trust_remote_code=True,
+            use_fast=False  # Use slow tokenizer for better compatibility
+        )
         model = AutoModelForCausalLM.from_pretrained(
             model_name,
+            torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
+            device_map="auto" if torch.cuda.is_available() else None,
             trust_remote_code=True
         )
     except Exception as e:
         logger.error(f"Failed to load model: {e}")
+        # SOLUTION 4: Fallback to a guaranteed working model
+        logger.info("Attempting fallback to GPT-2...")
+        try:
+            model_name = "gpt2"
+            tokenizer = AutoTokenizer.from_pretrained(model_name)
+            model = AutoModelForCausalLM.from_pretrained(
+                model_name,
+                torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
+                device_map="auto" if torch.cuda.is_available() else None
+            )
+            if tokenizer.pad_token is None:
+                tokenizer.pad_token = tokenizer.eos_token
+            logger.info(f"Fallback model loaded successfully: {model_name}")
+        except Exception as fallback_error:
+            logger.error(f"Fallback model also failed: {fallback_error}")
+            raise fallback_error
     yield
 ) -> tuple[str, Dict[str, int]]:
     """Generate response using the loaded model"""
+    # Handle device placement more robustly
+    device = next(model.parameters()).device
     inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=2048)
+    input_ids = inputs["input_ids"].to(device)
+    attention_mask = inputs["attention_mask"].to(device)
     input_length = input_ids.shape[1]