Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -2,7 +2,7 @@ from fastapi import FastAPI, HTTPException
|
|
| 2 |
from fastapi.middleware.cors import CORSMiddleware
|
| 3 |
import torch
|
| 4 |
import numpy as np
|
| 5 |
-
from transformers import AutoTokenizer, AutoModel
|
| 6 |
from typing import List, Union
|
| 7 |
import json
|
| 8 |
import logging
|
|
@@ -31,7 +31,18 @@ def load_model():
|
|
| 31 |
logger.info(f"Loading Qwen3-Embedding-0.6B model on device: {DEVICE}")
|
| 32 |
|
| 33 |
# Load tokenizer and model for Qwen3 embedding
|
| 34 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
model = AutoModel.from_pretrained(
|
| 36 |
MODEL_NAME,
|
| 37 |
trust_remote_code=True,
|
|
@@ -50,6 +61,7 @@ def load_model():
|
|
| 50 |
test_output = model(**test_input)
|
| 51 |
logger.info(f"Model test successful. Output shape: {test_output.last_hidden_state.shape}")
|
| 52 |
logger.info(f"Model config hidden size: {model.config.hidden_size}")
|
|
|
|
| 53 |
|
| 54 |
logger.info("Qwen3-Embedding-0.6B model loaded successfully")
|
| 55 |
return True
|
|
|
|
| 2 |
from fastapi.middleware.cors import CORSMiddleware
|
| 3 |
import torch
|
| 4 |
import numpy as np
|
| 5 |
+
from transformers import AutoTokenizer, AutoModel, AutoConfig
|
| 6 |
from typing import List, Union
|
| 7 |
import json
|
| 8 |
import logging
|
|
|
|
| 31 |
logger.info(f"Loading Qwen3-Embedding-0.6B model on device: {DEVICE}")
|
| 32 |
|
| 33 |
# Load tokenizer and model for Qwen3 embedding
|
| 34 |
+
# First, try to load the config to understand the model structure
|
| 35 |
+
config = AutoConfig.from_pretrained(MODEL_NAME, trust_remote_code=True)
|
| 36 |
+
logger.info(f"Model config loaded: {config.model_type}")
|
| 37 |
+
|
| 38 |
+
# Load tokenizer - try different approaches
|
| 39 |
+
try:
|
| 40 |
+
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)
|
| 41 |
+
except Exception as tokenizer_error:
|
| 42 |
+
logger.warning(f"Failed to load tokenizer with trust_remote_code=True: {tokenizer_error}")
|
| 43 |
+
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=False)
|
| 44 |
+
|
| 45 |
+
# Load model
|
| 46 |
model = AutoModel.from_pretrained(
|
| 47 |
MODEL_NAME,
|
| 48 |
trust_remote_code=True,
|
|
|
|
| 61 |
test_output = model(**test_input)
|
| 62 |
logger.info(f"Model test successful. Output shape: {test_output.last_hidden_state.shape}")
|
| 63 |
logger.info(f"Model config hidden size: {model.config.hidden_size}")
|
| 64 |
+
logger.info(f"Tokenizer vocab size: {tokenizer.vocab_size}")
|
| 65 |
|
| 66 |
logger.info("Qwen3-Embedding-0.6B model loaded successfully")
|
| 67 |
return True
|