Spaces:

firefighter
/

TransDis-CreativityAutoAssessment

Running

App Files Files Community

Qifan Zhang commited on 10 days ago

Commit

b91a6bd

1 Parent(s): 051bcc8

Add Qwen3 embedding support with last-token pooling

Browse files

Files changed (14) hide show

.codex/environments/environment.toml +0 -11
.gitignore +32 -4
AGENTS.md +3 -2
README.md +2 -2
app.py +67 -15
data/description.txt +3 -3
requirements.txt +1 -1
tests/test_app.py +31 -0
tests/test_models.py +94 -0
tests/test_pipeline.py +63 -0
utils/__pycache__/models.cpython-311.pyc +0 -0
utils/__pycache__/pipeline.cpython-311.pyc +0 -0
utils/models.py +51 -1
utils/pipeline.py +3 -3

.codex/environments/environment.toml DELETED Viewed

@@ -1,11 +0,0 @@
-# THIS IS AUTOGENERATED. DO NOT EDIT MANUALLY
-version = 1
-name = "TransDis-CreativityAutoAssessment"
-[setup]
-script = ""
-[[actions]]
-name = "运行"
-icon = "run"
-command = "GRADIO_SERVER_PORT=7860 /Users/eric/.local/bin/uv run --python 3.11 --with gradio==6.14.0 --with-requirements requirements.txt python app.py"

.gitignore CHANGED Viewed

@@ -1,6 +1,34 @@
-.idea
-flagged
-data/example
-data/tmp
 output.csv

+# Editor / OS noise
+.DS_Store
+.idea/
+.vscode/
+# Python caches
+__pycache__/
+*.py[cod]
+*$py.class
+.pytest_cache/
+.ruff_cache/
+.mypy_cache/
+.coverage
+htmlcov/
+# Local environments
+.env
+.env.*
+!.env.example
+.venv/
+venv/
+env/
+# Local app/runtime output
+flagged/
+data/example/
+data/tmp/
 output.csv
+outputs/
+tmp/
+temp/
+# Local Codex app metadata
+.codex/

AGENTS.md CHANGED Viewed

@@ -10,7 +10,7 @@
 - `app.py`：Gradio UI、输入解析、行数限制、任务分发、输出 CSV 生成。
 - `utils/pipeline.py`：`Originality` 和 `Flexibility` 的评分流程。
-- `utils/models.py`：模型列表、tokenizer/model 加载、pooling 行为。
 - `data/description.txt`：Space 页面中展示的长篇双语说明。
 - `.pre-commit-config.yaml`：本地 Ruff pre-commit hook。
@@ -34,7 +34,8 @@
 ## 模型与评分规则
 - 除非用户明确要求模型或评分变化，不要修改模型下拉列表、默认模型、pooling 选项或评分公式。
-- `ModelWithPooling` 使用 Hugging Face `AutoTokenizer` 和 `AutoModel`；UI 暴露的 pooling 模式是 `mean` 和 `cls`。
 - 避免在 app 启动时提前加载模型。模型下载和加载应只在处理请求时发生。
 - 做轻量本地测试时，优先 monkeypatch `pipeline.p0_originality` / `pipeline.p1_flexibility`，不要为了普通 smoke test 下载大模型；只有真实推理验证才下载模型。

 - `app.py`：Gradio UI、输入解析、行数限制、任务分发、输出 CSV 生成。
 - `utils/pipeline.py`：`Originality` 和 `Flexibility` 的评分流程。
+- `utils/models.py`：模型列表、embedding adapter registry、tokenizer/model 加载、pooling 行为。
 - `data/description.txt`：Space 页面中展示的长篇双语说明。
 - `.pre-commit-config.yaml`：本地 Ruff pre-commit hook。
 ## 模型与评分规则
 - 除非用户明确要求模型或评分变化，不要修改模型下拉列表、默认模型、pooling 选项或评分公式。
+- `utils.models.get_embedding_model()` 是评分流程的统一模型入口。legacy 模型走 `ModelWithPooling`，使用 Hugging Face `AutoTokenizer` 和 `AutoModel`；`Qwen/Qwen3-Embedding-0.6B` 走专用 adapter，自动使用官方推荐的 last-token pooling 和向量归一化。
+- UI 使用 Gradio `Blocks` 实现，但视觉结构应保持原来的左右两列：左侧输入、右侧输出。legacy 模型的 pooling 选项是 `mean` 和 `cls`；选择 `Qwen/Qwen3-Embedding-0.6B` 时，UI 应切换并锁定为 `last-token`，后端也应通过 `effective_pooling()` 兜底强制。
 - 避免在 app 启动时提前加载模型。模型下载和加载应只在处理请求时发生。
 - 做轻量本地测试时，优先 monkeypatch `pipeline.p0_originality` / `pipeline.p1_flexibility`，不要为了普通 smoke test 下载大模型；只有真实推理验证才下载模型。

README.md CHANGED Viewed

@@ -45,13 +45,13 @@ id,prompt,response
 ## 模型选择
-应用会在下拉菜单中提供多语言、英文和中文 Transformer 检查点。默认模型是：
 ```text
 sentence-transformers/paraphrase-multilingual-mpnet-base-v2
 ```
-可选 pooling 方式是 `mean` 和 `cls`。如果使用 `bert-base-chinese`，建议选择 `mean` pooling。
 ## 本地开发

 ## 模型选择
+应用会在下拉菜单中提供多语言、英文和中文 Transformer 检查点，包括多语言 embedding 模型 `Qwen/Qwen3-Embedding-0.6B`。默认模型是：
 ```text
 sentence-transformers/paraphrase-multilingual-mpnet-base-v2
 ```
+可选 pooling 方式主要用于 legacy Transformer 检查点，包括 `mean` 和 `cls`。如果使用 `bert-base-chinese`，建议选择 `mean` pooling。选择 `Qwen/Qwen3-Embedding-0.6B` 时，界面会自动切换并锁定为 `last-token`，应用会使用该模型推荐的 last-token pooling 和向量归一化，即使 API 请求传入其他 pooling 值也会被覆盖。
 ## 本地开发

app.py CHANGED Viewed

@@ -2,14 +2,19 @@ import os
 import tempfile
 import traceback
 from io import StringIO
-from typing import Optional
 import gradio as gr
 import pandas as pd
 from loguru import logger
 from utils import pipeline
-from utils.models import list_models
 def resolve_file_path(file) -> str:
@@ -33,14 +38,35 @@ def read_data(filepath: str) -> Optional[pd.DataFrame]:
     return df
 def process(
         task_name: str,
         model_name: str,
         pooling: str,
         text: str,
         file=None,
-) -> (None, pd.DataFrame, str):
     try:
         logger.info(f'Processing {task_name} with {model_name} and {pooling}')
         # load file
         if file:
@@ -56,6 +82,11 @@ def process(
         if len(df) > 10000:
             raise Exception('Data exceeds 10,000 rows')
         # process
         if task_name == 'Originality':
             df = pipeline.p0_originality(df, model_name, pooling)
@@ -68,7 +99,7 @@ def process(
         fd, path = tempfile.mkstemp(prefix='transdis_', suffix='.csv')
         os.close(fd)
         df.to_csv(path, index=False, encoding='utf-8-sig')
-        return None, df.iloc[:10], path
     except Exception:
         error = traceback.format_exc()
@@ -80,7 +111,7 @@ def process(
             'text': text,
             'file': file,
         })
-        return f'Something wrong\n\n{error}', None, None
 # input
@@ -96,8 +127,8 @@ model_name_dropdown = gr.components.Dropdown(
 )
 pooling_dropdown = gr.components.Dropdown(
     label='Pooling',
-    value='mean',
-    choices=['mean', 'cls']
 )
 text_input = gr.components.Textbox(
     value=open('data/example_xlm.csv', 'r').read(),
@@ -110,13 +141,34 @@ text_output = gr.components.Textbox(label='Output')
 dataframe_output = gr.components.Dataframe(label='DataFrame')
 file_output = gr.components.File(label='Output File', file_types=['.csv', '.xlsx'])
-app = gr.Interface(
-    fn=process,
-    inputs=[task_name_dropdown, model_name_dropdown, pooling_dropdown, text_input, file_input],
-    outputs=[text_output, dataframe_output, file_output],
-    description=open('data/description.txt', 'r').read(),
-    title='TransDis-CreativityAutoAssessment',
-    concurrency_limit=1,
-)
 if __name__ == '__main__':
     app.launch(max_threads=1)

 import tempfile
 import traceback
 from io import StringIO
+from typing import Generator, Optional
 import gradio as gr
 import pandas as pd
 from loguru import logger
 from utils import pipeline
+from utils.models import QWEN3_EMBEDDING_MODEL, get_embedding_model, list_models
+LEGACY_POOLING_CHOICES = ['mean', 'cls']
+QWEN3_POOLING_CHOICES = ['last-token']
+DEFAULT_POOLING = 'mean'
+QWEN3_POOLING = 'last-token'
 def resolve_file_path(file) -> str:
     return df
+def effective_pooling(model_name: str, pooling: str) -> str:
+    if model_name == QWEN3_EMBEDDING_MODEL:
+        return QWEN3_POOLING
+    return pooling
+def update_pooling_for_model(model_name: str):
+    if model_name == QWEN3_EMBEDDING_MODEL:
+        return gr.update(
+            choices=QWEN3_POOLING_CHOICES,
+            value=QWEN3_POOLING,
+            interactive=False,
+        )
+    return gr.update(
+        choices=LEGACY_POOLING_CHOICES,
+        value=DEFAULT_POOLING,
+        interactive=True,
+    )
 def process(
         task_name: str,
         model_name: str,
         pooling: str,
         text: str,
         file=None,
+) -> Generator[tuple[str, Optional[pd.DataFrame], Optional[str]], None, None]:
     try:
+        pooling = effective_pooling(model_name, pooling)
         logger.info(f'Processing {task_name} with {model_name} and {pooling}')
         # load file
         if file:
         if len(df) > 10000:
             raise Exception('Data exceeds 10,000 rows')
+        yield f'模型加载中：{model_name}', None, None
+        get_embedding_model(model_name)
+        yield '计算中...', None, None
         # process
         if task_name == 'Originality':
             df = pipeline.p0_originality(df, model_name, pooling)
         fd, path = tempfile.mkstemp(prefix='transdis_', suffix='.csv')
         os.close(fd)
         df.to_csv(path, index=False, encoding='utf-8-sig')
+        yield '完成', df.iloc[:10], path
     except Exception:
         error = traceback.format_exc()
             'text': text,
             'file': file,
         })
+        yield f'Something wrong\n\n{error}', None, None
 # input
 )
 pooling_dropdown = gr.components.Dropdown(
     label='Pooling',
+    value=DEFAULT_POOLING,
+    choices=LEGACY_POOLING_CHOICES
 )
 text_input = gr.components.Textbox(
     value=open('data/example_xlm.csv', 'r').read(),
 dataframe_output = gr.components.Dataframe(label='DataFrame')
 file_output = gr.components.File(label='Output File', file_types=['.csv', '.xlsx'])
+with gr.Blocks(title='TransDis-CreativityAutoAssessment') as app:
+    gr.Markdown('# TransDis-CreativityAutoAssessment')
+    gr.Markdown(open('data/description.txt', 'r').read())
+    with gr.Row():
+        with gr.Column():
+            task_name_dropdown.render()
+            model_name_dropdown.render()
+            pooling_dropdown.render()
+            text_input.render()
+            file_input.render()
+            submit_button = gr.Button('Submit', variant='primary')
+        with gr.Column():
+            text_output.render()
+            dataframe_output.render()
+            file_output.render()
+    model_name_dropdown.change(
+        fn=update_pooling_for_model,
+        inputs=model_name_dropdown,
+        outputs=pooling_dropdown,
+    )
+    submit_button.click(
+        fn=process,
+        inputs=[task_name_dropdown, model_name_dropdown, pooling_dropdown, text_input, file_input],
+        outputs=[text_output, dataframe_output, file_output],
+        api_name='predict',
+        concurrency_limit=1,
+    )
 if __name__ == '__main__':
     app.launch(max_threads=1)

data/description.txt CHANGED Viewed

@@ -1,5 +1,5 @@
-TransDis系统，是一个基于Transformer语言模型的语义距离评分系统，用于自动评估中文（或其他语言）的多用途任务（AUT)中的独创性和灵活性（论文见，https://link.springer.com/article/10.3758/s13428-023-02313-z ）。 输入被试（id)+提示词+回答的数据，每行1个用途，用逗号隔开。您可以通过文本框直接输入数据，也可以上传用逗号隔开的CSV格式文件或xlsx文件作为输入，CSV输入优先级高于文本框输入。 您可以选择用于评分的模型，请注意sentence-transformers_paraphrase-multilingual-mpnet-base-v2和sentence-transformers_paraphrase-multilingual-MiniLM-L12-v2可用于多语言，其他模型仅适用于英文或中文。 我们提供Pooling方法的选择，对于bert-base-chinese建议使用mean pooling。 如发生错误，请试着简化你的数据——用更少的行试试。如果不行，则可能是输入格式错误，请尝试重新保存为逗号分隔的CSV，然后再上传CSV文件。 如运行较慢，可以复制此空间至您的帐号（我们建议这种方式），并选择升级版的硬件以提升处理速度。 如需更多帮助或报告其他bug，请联系ydd409@163.com。
-TranDis, a semantic distance scoring system based on transformer-based language models, can be a useful tool to automatically assess originality and flexibility for AUT in Chinese or other languages (see the paper at https://link.springer.com/article/10.3758/s13428-023-02313-z). Enter your participant ID + prompt + response data, one per line, with a COMMA between each variable. You can either input data directly into the text box or upload a comma-separated CSV file or a XLSX file as input. Please note that if both methods are used, the CSV input will take precedence over the text box input. You can choose the model to use for scoring. Please note that sentence-transformers_paraphrase-multilingual-mpnet-base-v2 and sentence-transformers_paraphrase-multilingual-MiniLM-L12-v2 are applicable to multiple languages; cyclone_simcse-chinese-roberta-wwm-ext is only applicable to Chinese; sentence-transformers/all-mpnet-base-v2 and sentence-transformers/all-MiniLM-L12-v2 are only applicable to English. If an error occurs, try simplifying your data - does it work with fewer rows? If not, the input format may be incorrect. If the process is sluggish, you have the option to duplicate this space to your account (we recommend this approach) and choose an enhanced hardware configuration for improved processing speed. For more assistance or to report potential issues with our system, please contact ydd409@163.com.
-Reference: Yang, T., Zhang, Q., Sun, Z., & Hou, Y. (2023). Automatic Assessment of Divergent Thinking in Chinese Language with TransDis: A Transformer-Based Language Model Approach. Behavior Research Methods. https://doi.org/10.3758/s13428-023-02313-z

+TransDis系统，是一个基于Transformer语言模型的语义距离评分系统，用于自动评估中文（或其他语言）的多用途任务（AUT)中的独创性和灵活性（论文见，https://link.springer.com/article/10.3758/s13428-023-02313-z ）。 输入被试（id)+提示词+回答的数据，每行1个用途，用逗号隔开。您可以通过文本框直接输入数据，也可以上传用逗号隔开的CSV格式文件或xlsx文件作为输入，CSV输入优先级高于文本框输入。 您可以选择用于评分的模型，请注意sentence-transformers_paraphrase-multilingual-mpnet-base-v2、sentence-transformers_paraphrase-multilingual-MiniLM-L12-v2和Qwen/Qwen3-Embedding-0.6B可用于多语言，其他模型仅适用于英文或中文。 我们提供Pooling方法的选择，对于bert-base-chinese建议使用mean pooling；选择Qwen/Qwen3-Embedding-0.6B时，界面会自动切换并锁定为last-token，系统会使用该模型推荐的pooling和归一化方式。 如发生错误，请试着简化你的数据——用更少的行试试。如果不行，则可能是输入格式错误，请尝试重新保存为逗号分隔的CSV，然后再上传CSV文件。 如运行较慢，可以复制此空间至您的帐号（我们建议这种方式），并选择升级版的硬件以提升处理速度。 如需更多帮助或报告其他bug，请联系ydd409@163.com。
+TranDis, a semantic distance scoring system based on transformer-based language models, can be a useful tool to automatically assess originality and flexibility for AUT in Chinese or other languages (see the paper at https://link.springer.com/article/10.3758/s13428-023-02313-z). Enter your participant ID + prompt + response data, one per line, with a COMMA between each variable. You can either input data directly into the text box or upload a comma-separated CSV file or a XLSX file as input. Please note that if both methods are used, the CSV input will take precedence over the text box input. You can choose the model to use for scoring. Please note that sentence-transformers_paraphrase-multilingual-mpnet-base-v2, sentence-transformers_paraphrase-multilingual-MiniLM-L12-v2, and Qwen/Qwen3-Embedding-0.6B are applicable to multiple languages; cyclone_simcse-chinese-roberta-wwm-ext is only applicable to Chinese; sentence-transformers/all-mpnet-base-v2 and sentence-transformers/all-MiniLM-L12-v2 are only applicable to English. The pooling selector applies to legacy models; when Qwen/Qwen3-Embedding-0.6B is selected, the UI switches and locks the selector to last-token, and the backend uses its recommended pooling and vector normalization. If an error occurs, try simplifying your data - does it work with fewer rows? If not, the input format may be incorrect. If the process is sluggish, you have the option to duplicate this space to your account (we recommend this approach) and choose an enhanced hardware configuration for improved processing speed. For more assistance or to report potential issues with our system, please contact ydd409@163.com.
+Reference: Yang, T., Zhang, Q., Sun, Z., & Hou, Y. (2023). Automatic Assessment of Divergent Thinking in Chinese Language with TransDis: A Transformer-Based Language Model Approach. Behavior Research Methods. https://doi.org/10.3758/s13428-023-02313-z

requirements.txt CHANGED Viewed

@@ -1,7 +1,7 @@
 numpy<3
 pandas>=2.2,<4
-transformers>=4.45,<6
 sentence-transformers>=3,<6
 torch>=2.4,<3

 numpy<3
 pandas>=2.2,<4
+transformers>=4.51,<6
 sentence-transformers>=3,<6
 torch>=2.4,<3

tests/test_app.py ADDED Viewed

	@@ -0,0 +1,31 @@

+import unittest
+import app
+from utils.models import QWEN3_EMBEDDING_MODEL
+class PoolingUiTest(unittest.TestCase):
+    def test_pooling_dropdown_defaults_to_legacy_choices(self):
+        self.assertEqual(app.pooling_dropdown.value, 'mean')
+        self.assertEqual(app.pooling_dropdown.choices, [('mean', 'mean'), ('cls', 'cls')])
+    def test_qwen3_pooling_update_forces_last_token(self):
+        update = app.update_pooling_for_model(QWEN3_EMBEDDING_MODEL)
+        self.assertEqual(update['choices'], ['last-token'])
+        self.assertEqual(update['value'], 'last-token')
+        self.assertFalse(update['interactive'])
+    def test_legacy_pooling_update_restores_mean_cls(self):
+        update = app.update_pooling_for_model('bert-base-chinese')
+        self.assertEqual(update['choices'], ['mean', 'cls'])
+        self.assertEqual(update['value'], 'mean')
+        self.assertTrue(update['interactive'])
+    def test_qwen3_effective_pooling_ignores_api_pooling_value(self):
+        self.assertEqual(app.effective_pooling(QWEN3_EMBEDDING_MODEL, 'mean'), 'last-token')
+if __name__ == '__main__':
+    unittest.main()

tests/test_models.py ADDED Viewed

	@@ -0,0 +1,94 @@

+from types import SimpleNamespace
+import unittest
+from unittest.mock import patch
+import torch
+from utils import models
+class FakeTokenizer:
+    def __init__(self):
+        self.calls = []
+    def __call__(
+        self,
+        text,
+        padding,
+        truncation,
+        max_length,
+        return_tensors,
+    ):
+        self.calls.append(
+            {
+                'text': text,
+                'padding': padding,
+                'truncation': truncation,
+                'max_length': max_length,
+                'return_tensors': return_tensors,
+            }
+        )
+        return {
+            'input_ids': torch.tensor([[101, 102, 0]]),
+            'attention_mask': torch.tensor([[1, 1, 0]]),
+        }
+class FakeModel:
+    def __init__(self):
+        self.device = torch.device('cpu')
+        self.eval_called = False
+    def to(self, device):
+        self.device = torch.device(device)
+        return self
+    def eval(self):
+        self.eval_called = True
+        return self
+    def __call__(self, **inputs):
+        hidden_states = torch.tensor(
+            [[[3.0, 0.0], [0.0, 4.0], [5.0, 12.0]]],
+            device=self.device,
+        )
+        return SimpleNamespace(last_hidden_state=hidden_states)
+class Qwen3EmbeddingTest(unittest.TestCase):
+    def tearDown(self):
+        models.get_embedding_model.cache_clear()
+    def test_qwen3_model_is_available_without_changing_default(self):
+        self.assertEqual(
+            models.list_models[0],
+            'sentence-transformers/paraphrase-multilingual-mpnet-base-v2',
+        )
+        self.assertIn(models.QWEN3_EMBEDDING_MODEL, models.list_models)
+    def test_qwen3_uses_official_pooling_shape_and_normalization(self):
+        tokenizer = FakeTokenizer()
+        model = FakeModel()
+        models.get_embedding_model.cache_clear()
+        with (
+            patch.object(models.AutoTokenizer, 'from_pretrained', return_value=tokenizer) as load_tokenizer,
+            patch.object(models.AutoModel, 'from_pretrained', return_value=model) as load_model,
+        ):
+            embedding_model = models.get_embedding_model(models.QWEN3_EMBEDDING_MODEL)
+            embedding = embedding_model('hello', pooling='cls')
+        load_tokenizer.assert_called_once_with(
+            models.QWEN3_EMBEDDING_MODEL,
+            padding_side='left',
+        )
+        load_model.assert_called_once_with(models.QWEN3_EMBEDDING_MODEL)
+        self.assertTrue(model.eval_called)
+        self.assertEqual(tokenizer.calls[0]['max_length'], 8192)
+        self.assertEqual(tuple(embedding.shape), (2,))
+        torch.testing.assert_close(embedding.cpu(), torch.tensor([0.0, 1.0]))
+        self.assertAlmostEqual(torch.linalg.vector_norm(embedding).item(), 1.0)
+if __name__ == '__main__':
+    unittest.main()

tests/test_pipeline.py ADDED Viewed

	@@ -0,0 +1,63 @@

+import unittest
+from unittest.mock import patch
+import pandas as pd
+import torch
+from utils import pipeline
+class FakeEmbeddingModel:
+    def __init__(self, vectors):
+        self.vectors = vectors
+        self.calls = []
+    def __call__(self, text, pooling='mean'):
+        self.calls.append((text, pooling))
+        return torch.tensor(self.vectors[text])
+class PipelineFactoryTest(unittest.TestCase):
+    def test_originality_uses_embedding_factory(self):
+        model = FakeEmbeddingModel(
+            {
+                'prompt': [1.0, 0.0],
+                'response': [0.0, 1.0],
+            }
+        )
+        df = pd.DataFrame({'prompt': ['prompt'], 'response': ['response']})
+        with patch.object(pipeline, 'get_embedding_model', return_value=model) as factory:
+            result = pipeline.p0_originality(df, 'fake-model', 'mean')
+        factory.assert_called_once_with('fake-model')
+        self.assertAlmostEqual(result.loc[0, 'originality'], 1.0)
+        self.assertEqual(model.calls, [('prompt', 'mean'), ('response', 'mean')])
+    def test_flexibility_uses_embedding_factory(self):
+        model = FakeEmbeddingModel(
+            {
+                'p': [1.0, 0.0],
+                'a': [1.0, 0.0],
+                'b': [0.0, 1.0],
+            }
+        )
+        df = pd.DataFrame(
+            {
+                'id': [1, 1, 1],
+                'prompt': ['p', 'p', 'p'],
+                'response': ['a', 'b', 'a'],
+            }
+        )
+        with patch.object(pipeline, 'get_embedding_model', return_value=model) as factory:
+            result = pipeline.p1_flexibility(df, 'fake-model', 'cls')
+        factory.assert_called_once_with('fake-model')
+        self.assertEqual(len(result), 1)
+        self.assertAlmostEqual(result.loc[0, 'flexibility'], 2.0)
+        self.assertEqual(model.calls, [('a', 'cls'), ('b', 'cls'), ('a', 'cls')])
+if __name__ == '__main__':
+    unittest.main()

utils/__pycache__/models.cpython-311.pyc DELETED Viewed

Binary file (5.89 kB)

utils/__pycache__/pipeline.cpython-311.pyc DELETED Viewed

Binary file (3.87 kB)

utils/models.py CHANGED Viewed

@@ -1,13 +1,15 @@
 from functools import lru_cache
 import torch
 from loguru import logger
 from sentence_transformers import SentenceTransformer
 from transformers import AutoTokenizer, AutoModel
 DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
-list_models = [
     'sentence-transformers/paraphrase-multilingual-mpnet-base-v2',
     'sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2',
     'sentence-transformers/all-mpnet-base-v2',
@@ -17,6 +19,8 @@ list_models = [
     'IDEA-CCNL/Erlangshen-SimCSE-110M-Chinese',
 ]
 class SBert:
     def __init__(self, path):
@@ -68,6 +72,52 @@ class ModelWithPooling:
         return o
 def test_sbert():
     m = SBert('bert-base-chinese')
     o = m('hello')

 from functools import lru_cache
 import torch
+import torch.nn.functional as F
 from loguru import logger
 from sentence_transformers import SentenceTransformer
 from transformers import AutoTokenizer, AutoModel
 DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
+QWEN3_EMBEDDING_MODEL = 'Qwen/Qwen3-Embedding-0.6B'
+LEGACY_MODELS = [
     'sentence-transformers/paraphrase-multilingual-mpnet-base-v2',
     'sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2',
     'sentence-transformers/all-mpnet-base-v2',
     'IDEA-CCNL/Erlangshen-SimCSE-110M-Chinese',
 ]
+list_models = [*LEGACY_MODELS, QWEN3_EMBEDDING_MODEL]
 class SBert:
     def __init__(self, path):
         return o
+class Qwen3Embedding:
+    def __init__(self, path):
+        logger.info(f'Start loading {self.__class__} from {path} ...')
+        self.tokenizer = AutoTokenizer.from_pretrained(path, padding_side='left')
+        self.model = AutoModel.from_pretrained(path)
+        self.model.to(DEVICE)
+        self.model.eval()
+        logger.info(f'Load {self.__class__} from {path} ...')
+    @staticmethod
+    def last_token_pool(last_hidden_states: torch.Tensor, attention_mask: torch.Tensor) -> torch.Tensor:
+        left_padding = attention_mask[:, -1].sum() == attention_mask.shape[0]
+        if left_padding:
+            return last_hidden_states[:, -1]
+        sequence_lengths = attention_mask.sum(dim=1) - 1
+        batch_size = last_hidden_states.shape[0]
+        return last_hidden_states[
+            torch.arange(batch_size, device=last_hidden_states.device),
+            sequence_lengths,
+        ]
+    @lru_cache(maxsize=100)
+    @torch.no_grad()
+    def __call__(self, text: str, pooling='mean'):
+        inputs = self.tokenizer(
+            text,
+            padding=True,
+            truncation=True,
+            max_length=8192,
+            return_tensors='pt',
+        )
+        inputs = {key: value.to(DEVICE) for key, value in inputs.items()}
+        outputs = self.model(**inputs)
+        embeddings = self.last_token_pool(outputs.last_hidden_state, inputs['attention_mask'])
+        embeddings = F.normalize(embeddings, p=2, dim=1)
+        return embeddings.squeeze(0)
+@lru_cache(maxsize=8)
+def get_embedding_model(model_name: str):
+    if model_name == QWEN3_EMBEDDING_MODEL:
+        return Qwen3Embedding(model_name)
+    return ModelWithPooling(model_name)
 def test_sbert():
     m = SBert('bert-base-chinese')
     o = m('hello')

utils/pipeline.py CHANGED Viewed

@@ -3,7 +3,7 @@ from typing import List
 import pandas as pd
 from sentence_transformers.util import cos_sim
-from utils.models import ModelWithPooling
 def p0_originality(df: pd.DataFrame, model_name: str, pooling: str) -> pd.DataFrame:
@@ -15,7 +15,7 @@ def p0_originality(df: pd.DataFrame, model_name: str, pooling: str) -> pd.DataFr
     """
     assert 'prompt' in df.columns
     assert 'response' in df.columns
-    model = ModelWithPooling(model_name)
     def get_cos_sim(prompt: str, response: str) -> float:
         prompt_vec = model(text=prompt, pooling=pooling)
@@ -37,7 +37,7 @@ def p1_flexibility(df: pd.DataFrame, model_name: str, pooling: str) -> pd.DataFr
     assert 'prompt' in df.columns
     assert 'response' in df.columns
     assert 'id' in df.columns
-    model = ModelWithPooling(model_name)
     def get_flexibility(responses: List[str]) -> float:
         responses_vec = [model(text=_, pooling=pooling) for _ in responses]

 import pandas as pd
 from sentence_transformers.util import cos_sim
+from utils.models import get_embedding_model
 def p0_originality(df: pd.DataFrame, model_name: str, pooling: str) -> pd.DataFrame:
     """
     assert 'prompt' in df.columns
     assert 'response' in df.columns
+    model = get_embedding_model(model_name)
     def get_cos_sim(prompt: str, response: str) -> float:
         prompt_vec = model(text=prompt, pooling=pooling)
     assert 'prompt' in df.columns
     assert 'response' in df.columns
     assert 'id' in df.columns
+    model = get_embedding_model(model_name)
     def get_flexibility(responses: List[str]) -> float:
         responses_vec = [model(text=_, pooling=pooling) for _ in responses]