Spaces:

Ojochegbeng
/

Pansgpt

Running

App Files Files Community

Ojochegbeng commited on Sep 15

Commit

9e9c055

verified ·

1 Parent(s): fc9c99f

Update app.py

Browse files

Files changed (1) hide show

app.py +54 -11

app.py CHANGED Viewed

@@ -28,7 +28,7 @@ def load_model():
     global model, tokenizer
     try:
-        logger.info(f"Loading Qwen3 embedding model on device: {DEVICE}")
         # Load tokenizer and model for Qwen3 embedding
         tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)
@@ -44,7 +44,13 @@ def load_model():
         model.eval()
-        logger.info("Qwen3 embedding model loaded successfully")
         return True
     except Exception as e:
@@ -62,7 +68,7 @@ def load_model():
             return False
 def generate_embeddings(texts: Union[str, List[str]]) -> Union[List[float], List[List[float]]]:
-    """Generate embeddings for input text(s) using Qwen3 or fallback model"""
     global model, tokenizer
     try:
@@ -80,8 +86,9 @@ def generate_embeddings(texts: Union[str, List[str]]) -> Union[List[float], List
         for text in texts:
             try:
-                # Method 1: Try using the Qwen model directly
-                if model and tokenizer:
                     inputs = tokenizer(
                         text,
                         return_tensors="pt",
@@ -92,8 +99,25 @@ def generate_embeddings(texts: Union[str, List[str]]) -> Union[List[float], List
                     with torch.no_grad():
                         outputs = model(**inputs)
-                        # Use mean pooling of last hidden state
-                        embedding = outputs.last_hidden_state.mean(dim=1).squeeze().cpu().numpy()
                         embeddings.append(embedding.tolist())
                 elif model and hasattr(model, 'encode'):
@@ -106,7 +130,7 @@ def generate_embeddings(texts: Union[str, List[str]]) -> Union[List[float], List
             except Exception as e:
                 logger.warning(f"Error generating embedding for text: {str(e)}")
                 # Return zero vector as last resort
-                embeddings.append([0.0] * 384)  # Standard dimension for fallback
         return embeddings[0] if single_text else embeddings
@@ -114,9 +138,9 @@ def generate_embeddings(texts: Union[str, List[str]]) -> Union[List[float], List
         logger.error(f"Error in generate_embeddings: {str(e)}")
         # Return zero vectors as fallback
         if single_text:
-            return [0.0] * 384
         else:
-            return [[0.0] * 384] * len(texts)
 def compute_similarity(embedding1: List[float], embedding2: List[float]) -> float:
     """Compute cosine similarity between two embeddings"""
@@ -193,7 +217,26 @@ def similarity_interface(embedding1: str, embedding2: str) -> float:
 def health_check():
     """Health check endpoint"""
-    return {"status": "healthy", "model_loaded": model is not None}
 # Create FastAPI application
 app = FastAPI(

     global model, tokenizer
     try:
+        logger.info(f"Loading Qwen3-Embedding-0.6B model on device: {DEVICE}")
         # Load tokenizer and model for Qwen3 embedding
         tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)
         model.eval()
+        # Test the model with a simple input
+        test_input = tokenizer("test", return_tensors="pt", padding=True, truncation=True, max_length=MAX_LENGTH).to(DEVICE)
+        with torch.no_grad():
+            test_output = model(**test_input)
+            logger.info(f"Model test successful. Output shape: {test_output.last_hidden_state.shape}")
+        logger.info("Qwen3-Embedding-0.6B model loaded successfully")
         return True
     except Exception as e:
             return False
 def generate_embeddings(texts: Union[str, List[str]]) -> Union[List[float], List[List[float]]]:
+    """Generate embeddings for input text(s) using Qwen3-Embedding-0.6B model"""
     global model, tokenizer
     try:
         for text in texts:
             try:
+                # Method 1: Try using the Qwen3 embedding model directly
+                if model and tokenizer and hasattr(model, 'forward'):
+                    # This is the Qwen3 embedding model
                     inputs = tokenizer(
                         text,
                         return_tensors="pt",
                     with torch.no_grad():
                         outputs = model(**inputs)
+                        # For Qwen3 embedding models, use the last_hidden_state with mean pooling
+                        if hasattr(outputs, 'last_hidden_state'):
+                            # Mean pooling over the sequence length dimension
+                            attention_mask = inputs.get('attention_mask', None)
+                            if attention_mask is not None:
+                                # Apply attention mask for proper mean pooling
+                                token_embeddings = outputs.last_hidden_state
+                                input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
+                                sum_embeddings = torch.sum(token_embeddings * input_mask_expanded, 1)
+                                sum_mask = torch.clamp(input_mask_expanded.sum(1), min=1e-9)
+                                embedding = (sum_embeddings / sum_mask).squeeze().cpu().numpy()
+                            else:
+                                # Simple mean pooling without attention mask
+                                embedding = outputs.last_hidden_state.mean(dim=1).squeeze().cpu().numpy()
+                        else:
+                            # Fallback to pooled output if available
+                            embedding = outputs.pooler_output.squeeze().cpu().numpy()
                         embeddings.append(embedding.tolist())
                 elif model and hasattr(model, 'encode'):
             except Exception as e:
                 logger.warning(f"Error generating embedding for text: {str(e)}")
                 # Return zero vector as last resort
+                embeddings.append([0.0] * 1024)  # Qwen3-Embedding-0.6B has 1024 dimensions
         return embeddings[0] if single_text else embeddings
         logger.error(f"Error in generate_embeddings: {str(e)}")
         # Return zero vectors as fallback
         if single_text:
+            return [0.0] * 1024
         else:
+            return [[0.0] * 1024] * len(texts)
 def compute_similarity(embedding1: List[float], embedding2: List[float]) -> float:
     """Compute cosine similarity between two embeddings"""
 def health_check():
     """Health check endpoint"""
+    model_info = {
+        "status": "healthy" if model is not None else "unhealthy",
+        "model_loaded": model is not None,
+        "model_name": MODEL_NAME,
+        "device": DEVICE,
+        "max_length": MAX_LENGTH
+    }
+    if model is not None:
+        if hasattr(model, 'config'):
+            model_info["model_type"] = "Qwen3-Embedding"
+            model_info["embedding_dimension"] = getattr(model.config, 'hidden_size', 1024)
+        elif hasattr(model, 'encode'):
+            model_info["model_type"] = "SentenceTransformer-Fallback"
+            model_info["embedding_dimension"] = 384
+        else:
+            model_info["model_type"] = "Unknown"
+            model_info["embedding_dimension"] = "Unknown"
+    return model_info
 # Create FastAPI application
 app = FastAPI(