Ojochegbeng commited on
Commit
3d61fba
·
verified ·
1 Parent(s): 58cc235

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +36 -64
app.py CHANGED
@@ -49,28 +49,23 @@ def load_model():
49
  with torch.no_grad():
50
  test_output = model(**test_input)
51
  logger.info(f"Model test successful. Output shape: {test_output.last_hidden_state.shape}")
 
52
 
53
  logger.info("Qwen3-Embedding-0.6B model loaded successfully")
54
  return True
55
 
56
  except Exception as e:
57
  logger.error(f"Error loading Qwen3 model: {str(e)}")
58
- # Try fallback to a simpler approach
59
- try:
60
- logger.info("Trying fallback model loading...")
61
- from sentence_transformers import SentenceTransformer
62
- model = SentenceTransformer('all-MiniLM-L6-v2')
63
- tokenizer = None
64
- logger.info("Fallback model loaded successfully")
65
- return True
66
- except Exception as fallback_error:
67
- logger.error(f"Fallback model loading also failed: {str(fallback_error)}")
68
  return False
69
 
70
  def generate_embeddings(texts: Union[str, List[str]]) -> Union[List[float], List[List[float]]]:
71
  """Generate embeddings for input text(s) using Qwen3-Embedding-0.6B model"""
72
  global model, tokenizer
73
 
 
 
 
74
  try:
75
  # Ensure texts is a list
76
  if isinstance(texts, str):
@@ -86,9 +81,7 @@ def generate_embeddings(texts: Union[str, List[str]]) -> Union[List[float], List
86
 
87
  for text in texts:
88
  try:
89
- # Method 1: Try using the Qwen3 embedding model directly
90
- if model and tokenizer and hasattr(model, 'forward'):
91
- # This is the Qwen3 embedding model
92
  inputs = tokenizer(
93
  text,
94
  return_tensors="pt",
@@ -99,60 +92,36 @@ def generate_embeddings(texts: Union[str, List[str]]) -> Union[List[float], List
99
 
100
  with torch.no_grad():
101
  outputs = model(**inputs)
102
-
103
- # For Qwen3 embedding models, use the last_hidden_state with mean pooling
104
- if hasattr(outputs, 'last_hidden_state'):
105
- # Mean pooling over the sequence length dimension
106
- attention_mask = inputs.get('attention_mask', None)
107
- if attention_mask is not None:
108
- # Apply attention mask for proper mean pooling
109
- token_embeddings = outputs.last_hidden_state
110
- input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
111
- sum_embeddings = torch.sum(token_embeddings * input_mask_expanded, 1)
112
- sum_mask = torch.clamp(input_mask_expanded.sum(1), min=1e-9)
113
- embedding = (sum_embeddings / sum_mask).squeeze().cpu().numpy()
114
- else:
115
- # Simple mean pooling without attention mask
116
- embedding = outputs.last_hidden_state.mean(dim=1).squeeze().cpu().numpy()
117
  else:
118
- # Fallback to pooled output if available
119
- embedding = outputs.pooler_output.squeeze().cpu().numpy()
120
-
 
 
 
121
  embeddings.append(embedding.tolist())
122
 
123
- elif model and hasattr(model, 'encode'):
124
- # Method 2: Using sentence transformer fallback
125
- embedding = model.encode(text)
126
- embeddings.append(embedding.tolist())
127
- else:
128
- raise Exception("No model available")
129
-
130
  except Exception as e:
131
- logger.warning(f"Error generating embedding for text: {str(e)}")
132
- # Return zero vector as last resort - use correct dimension based on model type
133
- if hasattr(model, 'config') and hasattr(model.config, 'hidden_size'):
134
- # Qwen3 model dimension
135
- embeddings.append([0.0] * model.config.hidden_size)
136
- else:
137
- # Fallback model dimension (384 for all-MiniLM-L6-v2)
138
- embeddings.append([0.0] * 384)
139
 
140
  return embeddings[0] if single_text else embeddings
141
 
142
  except Exception as e:
143
  logger.error(f"Error in generate_embeddings: {str(e)}")
144
- # Return zero vectors as fallback - use correct dimension
145
- if hasattr(model, 'config') and hasattr(model.config, 'hidden_size'):
146
- # Qwen3 model dimension
147
- fallback_dim = model.config.hidden_size
148
- else:
149
- # Fallback model dimension (384 for all-MiniLM-L6-v2)
150
- fallback_dim = 384
151
-
152
- if single_text:
153
- return [0.0] * fallback_dim
154
- else:
155
- return [[0.0] * fallback_dim] * len(texts)
156
 
157
  def compute_similarity(embedding1: List[float], embedding2: List[float]) -> float:
158
  """Compute cosine similarity between two embeddings"""
@@ -230,23 +199,26 @@ def similarity_interface(embedding1: str, embedding2: str) -> float:
230
  def health_check():
231
  """Health check endpoint"""
232
  model_info = {
233
- "status": "healthy" if model is not None else "unhealthy",
234
- "model_loaded": model is not None,
235
  "model_name": MODEL_NAME,
236
  "device": DEVICE,
237
  "max_length": MAX_LENGTH
238
  }
239
 
240
- if model is not None:
241
  if hasattr(model, 'config'):
242
  model_info["model_type"] = "Qwen3-Embedding"
243
  model_info["embedding_dimension"] = getattr(model.config, 'hidden_size', 1024)
244
- elif hasattr(model, 'encode'):
245
- model_info["model_type"] = "SentenceTransformer-Fallback"
246
- model_info["embedding_dimension"] = 384
247
  else:
248
  model_info["model_type"] = "Unknown"
249
  model_info["embedding_dimension"] = "Unknown"
 
 
 
 
 
250
 
251
  return model_info
252
 
 
49
  with torch.no_grad():
50
  test_output = model(**test_input)
51
  logger.info(f"Model test successful. Output shape: {test_output.last_hidden_state.shape}")
52
+ logger.info(f"Model config hidden size: {model.config.hidden_size}")
53
 
54
  logger.info("Qwen3-Embedding-0.6B model loaded successfully")
55
  return True
56
 
57
  except Exception as e:
58
  logger.error(f"Error loading Qwen3 model: {str(e)}")
59
+ logger.error("No fallback available - Qwen3 model is required")
 
 
 
 
 
 
 
 
 
60
  return False
61
 
62
  def generate_embeddings(texts: Union[str, List[str]]) -> Union[List[float], List[List[float]]]:
63
  """Generate embeddings for input text(s) using Qwen3-Embedding-0.6B model"""
64
  global model, tokenizer
65
 
66
+ if not model or not tokenizer:
67
+ raise Exception("Qwen3 model not loaded. Please ensure the model is properly loaded.")
68
+
69
  try:
70
  # Ensure texts is a list
71
  if isinstance(texts, str):
 
81
 
82
  for text in texts:
83
  try:
84
+ # Use the Qwen3 embedding model directly
 
 
85
  inputs = tokenizer(
86
  text,
87
  return_tensors="pt",
 
92
 
93
  with torch.no_grad():
94
  outputs = model(**inputs)
95
+
96
+ # For Qwen3 embedding models, use the last_hidden_state with mean pooling
97
+ if hasattr(outputs, 'last_hidden_state'):
98
+ # Mean pooling over the sequence length dimension
99
+ attention_mask = inputs.get('attention_mask', None)
100
+ if attention_mask is not None:
101
+ # Apply attention mask for proper mean pooling
102
+ token_embeddings = outputs.last_hidden_state
103
+ input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
104
+ sum_embeddings = torch.sum(token_embeddings * input_mask_expanded, 1)
105
+ sum_mask = torch.clamp(input_mask_expanded.sum(1), min=1e-9)
106
+ embedding = (sum_embeddings / sum_mask).squeeze().cpu().numpy()
 
 
 
107
  else:
108
+ # Simple mean pooling without attention mask
109
+ embedding = outputs.last_hidden_state.mean(dim=1).squeeze().cpu().numpy()
110
+ else:
111
+ # Fallback to pooled output if available
112
+ embedding = outputs.pooler_output.squeeze().cpu().numpy()
113
+
114
  embeddings.append(embedding.tolist())
115
 
 
 
 
 
 
 
 
116
  except Exception as e:
117
+ logger.error(f"Error generating embedding for text: {str(e)}")
118
+ raise Exception(f"Failed to generate embedding: {str(e)}")
 
 
 
 
 
 
119
 
120
  return embeddings[0] if single_text else embeddings
121
 
122
  except Exception as e:
123
  logger.error(f"Error in generate_embeddings: {str(e)}")
124
+ raise Exception(f"Embedding generation failed: {str(e)}")
 
 
 
 
 
 
 
 
 
 
 
125
 
126
  def compute_similarity(embedding1: List[float], embedding2: List[float]) -> float:
127
  """Compute cosine similarity between two embeddings"""
 
199
  def health_check():
200
  """Health check endpoint"""
201
  model_info = {
202
+ "status": "healthy" if model is not None and tokenizer is not None else "unhealthy",
203
+ "model_loaded": model is not None and tokenizer is not None,
204
  "model_name": MODEL_NAME,
205
  "device": DEVICE,
206
  "max_length": MAX_LENGTH
207
  }
208
 
209
+ if model is not None and tokenizer is not None:
210
  if hasattr(model, 'config'):
211
  model_info["model_type"] = "Qwen3-Embedding"
212
  model_info["embedding_dimension"] = getattr(model.config, 'hidden_size', 1024)
213
+ model_info["tokenizer_loaded"] = True
 
 
214
  else:
215
  model_info["model_type"] = "Unknown"
216
  model_info["embedding_dimension"] = "Unknown"
217
+ model_info["tokenizer_loaded"] = False
218
+ else:
219
+ model_info["model_type"] = "Not Loaded"
220
+ model_info["embedding_dimension"] = "N/A"
221
+ model_info["tokenizer_loaded"] = tokenizer is not None
222
 
223
  return model_info
224