Spaces:
Sleeping
Sleeping
| from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM | |
| # Load Qwen 微调模型用于 emoji 转换 | |
| emoji_translator = pipeline( | |
| "text-generation", | |
| model="JenniferHJF/qwen1.5-emoji-finetuned", | |
| tokenizer="JenniferHJF/qwen1.5-emoji-finetuned", | |
| max_new_tokens=20, | |
| trust_remote_code=True | |
| ) | |
| # Load zero-shot/offensive-classification model(可替换为 ChatGLM3、DeepSeek 等) | |
| offensive_classifier = pipeline( | |
| "text-classification", | |
| model="s-nlp/roberta-offensive-language-detection" # 示例模型,可换大模型 | |
| ) | |
| # Unified prediction function | |
| def classify_text_with_emoji(raw_text): | |
| # Step 1: Convert emojis ➝ Chinese | |
| prompt = f"输入:{raw_text}\n输出:" | |
| converted = emoji_translator(prompt)[0]['generated_text'] | |
| # 拿最后一行当输出结果(避免生成前缀) | |
| translated_text = converted.strip().splitlines()[-1] | |
| # Step 2: Run classification | |
| result = offensive_classifier(translated_text)[0] | |
| label = result['label'] | |
| score = result['score'] | |
| return translated_text, label, score | |