BGE-image / app.py
simler's picture
Update app.py
3d360d7 verified
from fastapi import FastAPI, Request
from sentence_transformers import SentenceTransformer, util
import json
import torch
import os
app = FastAPI()
# 阈值 (低于这个分数强制返回 neutral)
THRESHOLD = 0.35
print("正在加载 BGE-Large-ZH-v1.5...")
# 这里会自动下载模型,如果日志卡在这里请耐心等待
model = SentenceTransformer('BAAI/bge-large-zh-v1.5')
print("模型加载完成")
def load_data():
if not os.path.exists('emoji_labels.json'):
print("警告: 找不到 emoji_labels.json")
return [], None
with open('emoji_labels.json', 'r', encoding='utf-8') as f:
data = json.load(f)
texts = [item['text'] for item in data]
# 预计算向量
embeddings = model.encode(texts, normalize_embeddings=True, convert_to_tensor=True)
return data, embeddings
# 初始化数据
emoji_data, emoji_embeddings = load_data()
@app.get("/")
def home():
return "Kouri 5-Emotion System Ready"
@app.post("/match")
async def match_emoji(request: Request):
"""
输入: {"text": "我想吃汉堡"}
输出: {"label": "happy", "score": 0.85}
"""
try:
body = await request.json()
user_text = body.get("text", "")
# 兜底:空输入或者数据没加载好,返回 neutral
if not user_text or emoji_embeddings is None:
return {"label": "neutral", "score": 0.0}
# 构造查询 (BGE模型建议加前缀)
query_text = "为这个句子分类情感:" + user_text
query_emb = model.encode(query_text, normalize_embeddings=True, convert_to_tensor=True)
# 计算相似度
scores = util.cos_sim(query_emb, emoji_embeddings)[0]
best_score = float(torch.max(scores))
best_idx = int(torch.argmax(scores))
matched_item = emoji_data[best_idx]
# 打印日志
print(f"输入: {user_text} | 匹配: {matched_item['label']} | 分数: {best_score:.4f}")
# 逻辑判断
if best_score > THRESHOLD:
return {
"label": matched_item['label'], # 例如 "happy"
"score": best_score # 例如 0.8512
}
else:
# 分数太低,返回 neutral
return {
"label": "neutral",
"score": best_score
}
except Exception as e:
print(f"Error: {e}")
# 发生任何报错都返回 neutral,保证程序不崩
return {"label": "neutral", "score": 0.0}