Spaces:

Ojochegbeng
/

Pansgpt

Running

App Files Files Community

Ojochegbeng commited on Sep 15

Commit

3d61fba

verified ·

1 Parent(s): 58cc235

Update app.py

Browse files

Files changed (1) hide show

app.py +36 -64

app.py CHANGED Viewed

@@ -49,28 +49,23 @@ def load_model():
         with torch.no_grad():
             test_output = model(**test_input)
             logger.info(f"Model test successful. Output shape: {test_output.last_hidden_state.shape}")
         logger.info("Qwen3-Embedding-0.6B model loaded successfully")
         return True
     except Exception as e:
         logger.error(f"Error loading Qwen3 model: {str(e)}")
-        # Try fallback to a simpler approach
-        try:
-            logger.info("Trying fallback model loading...")
-            from sentence_transformers import SentenceTransformer
-            model = SentenceTransformer('all-MiniLM-L6-v2')
-            tokenizer = None
-            logger.info("Fallback model loaded successfully")
-            return True
-        except Exception as fallback_error:
-            logger.error(f"Fallback model loading also failed: {str(fallback_error)}")
         return False
 def generate_embeddings(texts: Union[str, List[str]]) -> Union[List[float], List[List[float]]]:
     """Generate embeddings for input text(s) using Qwen3-Embedding-0.6B model"""
     global model, tokenizer
     try:
         # Ensure texts is a list
         if isinstance(texts, str):
@@ -86,9 +81,7 @@ def generate_embeddings(texts: Union[str, List[str]]) -> Union[List[float], List
         for text in texts:
             try:
-                # Method 1: Try using the Qwen3 embedding model directly
-                if model and tokenizer and hasattr(model, 'forward'):
-                    # This is the Qwen3 embedding model
                     inputs = tokenizer(
                         text,
                         return_tensors="pt",
@@ -99,60 +92,36 @@ def generate_embeddings(texts: Union[str, List[str]]) -> Union[List[float], List
                     with torch.no_grad():
                         outputs = model(**inputs)
-                        # For Qwen3 embedding models, use the last_hidden_state with mean pooling
-                        if hasattr(outputs, 'last_hidden_state'):
-                            # Mean pooling over the sequence length dimension
-                            attention_mask = inputs.get('attention_mask', None)
-                            if attention_mask is not None:
-                                # Apply attention mask for proper mean pooling
-                                token_embeddings = outputs.last_hidden_state
-                                input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
-                                sum_embeddings = torch.sum(token_embeddings * input_mask_expanded, 1)
-                                sum_mask = torch.clamp(input_mask_expanded.sum(1), min=1e-9)
-                                embedding = (sum_embeddings / sum_mask).squeeze().cpu().numpy()
-                            else:
-                                # Simple mean pooling without attention mask
-                                embedding = outputs.last_hidden_state.mean(dim=1).squeeze().cpu().numpy()
                         else:
-                            # Fallback to pooled output if available
-                            embedding = outputs.pooler_output.squeeze().cpu().numpy()
                         embeddings.append(embedding.tolist())
-                elif model and hasattr(model, 'encode'):
-                    # Method 2: Using sentence transformer fallback
-                    embedding = model.encode(text)
-                    embeddings.append(embedding.tolist())
-                else:
-                    raise Exception("No model available")
             except Exception as e:
-                logger.warning(f"Error generating embedding for text: {str(e)}")
-                # Return zero vector as last resort - use correct dimension based on model type
-                if hasattr(model, 'config') and hasattr(model.config, 'hidden_size'):
-                    # Qwen3 model dimension
-                    embeddings.append([0.0] * model.config.hidden_size)
-                else:
-                    # Fallback model dimension (384 for all-MiniLM-L6-v2)
-                    embeddings.append([0.0] * 384)
         return embeddings[0] if single_text else embeddings
     except Exception as e:
         logger.error(f"Error in generate_embeddings: {str(e)}")
-        # Return zero vectors as fallback - use correct dimension
-        if hasattr(model, 'config') and hasattr(model.config, 'hidden_size'):
-            # Qwen3 model dimension
-            fallback_dim = model.config.hidden_size
-        else:
-            # Fallback model dimension (384 for all-MiniLM-L6-v2)
-            fallback_dim = 384
-        if single_text:
-            return [0.0] * fallback_dim
-        else:
-            return [[0.0] * fallback_dim] * len(texts)
 def compute_similarity(embedding1: List[float], embedding2: List[float]) -> float:
     """Compute cosine similarity between two embeddings"""
@@ -230,23 +199,26 @@ def similarity_interface(embedding1: str, embedding2: str) -> float:
 def health_check():
     """Health check endpoint"""
     model_info = {
-        "status": "healthy" if model is not None else "unhealthy",
-        "model_loaded": model is not None,
         "model_name": MODEL_NAME,
         "device": DEVICE,
         "max_length": MAX_LENGTH
     }
-    if model is not None:
         if hasattr(model, 'config'):
             model_info["model_type"] = "Qwen3-Embedding"
             model_info["embedding_dimension"] = getattr(model.config, 'hidden_size', 1024)
-        elif hasattr(model, 'encode'):
-            model_info["model_type"] = "SentenceTransformer-Fallback"
-            model_info["embedding_dimension"] = 384
         else:
             model_info["model_type"] = "Unknown"
             model_info["embedding_dimension"] = "Unknown"
     return model_info

         with torch.no_grad():
             test_output = model(**test_input)
             logger.info(f"Model test successful. Output shape: {test_output.last_hidden_state.shape}")
+            logger.info(f"Model config hidden size: {model.config.hidden_size}")
         logger.info("Qwen3-Embedding-0.6B model loaded successfully")
         return True
     except Exception as e:
         logger.error(f"Error loading Qwen3 model: {str(e)}")
+        logger.error("No fallback available - Qwen3 model is required")
         return False
 def generate_embeddings(texts: Union[str, List[str]]) -> Union[List[float], List[List[float]]]:
     """Generate embeddings for input text(s) using Qwen3-Embedding-0.6B model"""
     global model, tokenizer
+    if not model or not tokenizer:
+        raise Exception("Qwen3 model not loaded. Please ensure the model is properly loaded.")
     try:
         # Ensure texts is a list
         if isinstance(texts, str):
         for text in texts:
             try:
+                # Use the Qwen3 embedding model directly
                     inputs = tokenizer(
                         text,
                         return_tensors="pt",
                     with torch.no_grad():
                         outputs = model(**inputs)
+                    # For Qwen3 embedding models, use the last_hidden_state with mean pooling
+                    if hasattr(outputs, 'last_hidden_state'):
+                        # Mean pooling over the sequence length dimension
+                        attention_mask = inputs.get('attention_mask', None)
+                        if attention_mask is not None:
+                            # Apply attention mask for proper mean pooling
+                            token_embeddings = outputs.last_hidden_state
+                            input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
+                            sum_embeddings = torch.sum(token_embeddings * input_mask_expanded, 1)
+                            sum_mask = torch.clamp(input_mask_expanded.sum(1), min=1e-9)
+                            embedding = (sum_embeddings / sum_mask).squeeze().cpu().numpy()
                         else:
+                            # Simple mean pooling without attention mask
+                        embedding = outputs.last_hidden_state.mean(dim=1).squeeze().cpu().numpy()
+                    else:
+                        # Fallback to pooled output if available
+                        embedding = outputs.pooler_output.squeeze().cpu().numpy()
                         embeddings.append(embedding.tolist())
             except Exception as e:
+                logger.error(f"Error generating embedding for text: {str(e)}")
+                raise Exception(f"Failed to generate embedding: {str(e)}")
         return embeddings[0] if single_text else embeddings
     except Exception as e:
         logger.error(f"Error in generate_embeddings: {str(e)}")
+        raise Exception(f"Embedding generation failed: {str(e)}")
 def compute_similarity(embedding1: List[float], embedding2: List[float]) -> float:
     """Compute cosine similarity between two embeddings"""
 def health_check():
     """Health check endpoint"""
     model_info = {
+        "status": "healthy" if model is not None and tokenizer is not None else "unhealthy",
+        "model_loaded": model is not None and tokenizer is not None,
         "model_name": MODEL_NAME,
         "device": DEVICE,
         "max_length": MAX_LENGTH
     }
+    if model is not None and tokenizer is not None:
         if hasattr(model, 'config'):
             model_info["model_type"] = "Qwen3-Embedding"
             model_info["embedding_dimension"] = getattr(model.config, 'hidden_size', 1024)
+            model_info["tokenizer_loaded"] = True
         else:
             model_info["model_type"] = "Unknown"
             model_info["embedding_dimension"] = "Unknown"
+            model_info["tokenizer_loaded"] = False
+    else:
+        model_info["model_type"] = "Not Loaded"
+        model_info["embedding_dimension"] = "N/A"
+        model_info["tokenizer_loaded"] = tokenizer is not None
     return model_info