1MR commited on
Commit
bcca643
·
verified ·
1 Parent(s): eed0b29

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +41 -9
app.py CHANGED
@@ -43,16 +43,26 @@ async def lifespan(app: FastAPI):
43
  global model, tokenizer
44
  logger.info("Loading model and tokenizer...")
45
 
46
- # Replace with your model path/name
47
- model_name = "Qwen/Qwen3-4B" # or local path
48
- # model_name = "your-username/your-fine-tuned-model" # or local path
 
 
 
 
49
 
50
  try:
51
- tokenizer = AutoTokenizer.from_pretrained(model_name)
 
 
 
 
 
 
52
  model = AutoModelForCausalLM.from_pretrained(
53
  model_name,
54
- torch_dtype=torch.float16,
55
- device_map="auto",
56
  trust_remote_code=True
57
  )
58
 
@@ -64,7 +74,26 @@ async def lifespan(app: FastAPI):
64
 
65
  except Exception as e:
66
  logger.error(f"Failed to load model: {e}")
67
- raise e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
 
69
  yield
70
 
@@ -104,9 +133,12 @@ def generate_response(
104
  ) -> tuple[str, Dict[str, int]]:
105
  """Generate response using the loaded model"""
106
 
 
 
 
107
  inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=2048)
108
- input_ids = inputs["input_ids"].to(model.device)
109
- attention_mask = inputs["attention_mask"].to(model.device)
110
 
111
  input_length = input_ids.shape[1]
112
 
 
43
  global model, tokenizer
44
  logger.info("Loading model and tokenizer...")
45
 
46
+ # SOLUTION 1: Use a more compatible model
47
+ # Replace Qwen3-4B with a widely supported model
48
+ model_name = "microsoft/DialoGPT-medium" # Alternative: "gpt2", "microsoft/DialoGPT-small"
49
+
50
+ # SOLUTION 2: If you want to use Qwen models, try these alternatives:
51
+ # model_name = "Qwen/Qwen1.5-0.5B-Chat" # Smaller, more compatible Qwen model
52
+ # model_name = "Qwen/Qwen2-0.5B-Instruct" # Even smaller option
53
 
54
  try:
55
+ # SOLUTION 3: Add trust_remote_code=True and use_fast=False for better compatibility
56
+ tokenizer = AutoTokenizer.from_pretrained(
57
+ model_name,
58
+ trust_remote_code=True,
59
+ use_fast=False # Use slow tokenizer for better compatibility
60
+ )
61
+
62
  model = AutoModelForCausalLM.from_pretrained(
63
  model_name,
64
+ torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
65
+ device_map="auto" if torch.cuda.is_available() else None,
66
  trust_remote_code=True
67
  )
68
 
 
74
 
75
  except Exception as e:
76
  logger.error(f"Failed to load model: {e}")
77
+
78
+ # SOLUTION 4: Fallback to a guaranteed working model
79
+ logger.info("Attempting fallback to GPT-2...")
80
+ try:
81
+ model_name = "gpt2"
82
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
83
+ model = AutoModelForCausalLM.from_pretrained(
84
+ model_name,
85
+ torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
86
+ device_map="auto" if torch.cuda.is_available() else None
87
+ )
88
+
89
+ if tokenizer.pad_token is None:
90
+ tokenizer.pad_token = tokenizer.eos_token
91
+
92
+ logger.info(f"Fallback model loaded successfully: {model_name}")
93
+
94
+ except Exception as fallback_error:
95
+ logger.error(f"Fallback model also failed: {fallback_error}")
96
+ raise fallback_error
97
 
98
  yield
99
 
 
133
  ) -> tuple[str, Dict[str, int]]:
134
  """Generate response using the loaded model"""
135
 
136
+ # Handle device placement more robustly
137
+ device = next(model.parameters()).device
138
+
139
  inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=2048)
140
+ input_ids = inputs["input_ids"].to(device)
141
+ attention_mask = inputs["attention_mask"].to(device)
142
 
143
  input_length = input_ids.shape[1]
144