Spaces:

simler
/

BGE-image

Runtime error

App Files Files Community

simler commited on Jan 25

Commit

a3d18a3

verified ·

1 Parent(s): db8d7e5

Update app.py

Browse files

Files changed (1) hide show

app.py +28 -47

app.py CHANGED Viewed

@@ -6,85 +6,66 @@ import os
 app = FastAPI()
-# ================= 配置区域 =================
-# 匹配阈值 (建议 0.4 - 0.5)
-# BGE 模型的相似度分布通常在 0.6-1.0 之间，所以阈值要设高一点
-THRESHOLD = 0.38
-print("正在加载 BGE-Large-ZH-v1.5 (中文最强模型)...")
-# 替换为 BAAI/bge-large-zh-v1.5
-# 第一次启动下载需要几十秒，请耐心等待 Space 状态变绿
 model = SentenceTransformer('BAAI/bge-large-zh-v1.5')
-print("模型加载完成！")
-# ================= 数据预处理 =================
-def load_and_encode_data():
     if not os.path.exists('emoji_labels.json'):
-        print("错误: 找不到 emoji_labels.json")
         return [], None
     with open('emoji_labels.json', 'r', encoding='utf-8') as f:
         data = json.load(f)
     texts = [item['text'] for item in data]
-    # BGE 模型建议在查询前加指令，但在这种对称匹配场景下，直接 encode 效果也很好
-    # 预先计算库中标签的向量
     embeddings = model.encode(texts, normalize_embeddings=True, convert_to_tensor=True)
     return data, embeddings
-# 初始化数据
-emoji_data, emoji_embeddings = load_and_encode_data()
-# ================= API 接口 =================
 @app.get("/")
 def home():
-    return {"status": "Kouri BGE-Large API is running"}
 @app.post("/match")
 async def match_emoji(request: Request):
     try:
         body = await request.json()
         user_text = body.get("text", "")
         if not user_text or emoji_embeddings is None:
-            return {"label": None}
-        # BGE 模型的小技巧：给查询文本加个指令前缀，效果会更精准
-        # 意思就是告诉模型：“帮我为这句话生成个表示，用来找对应的标签”
-        query_instruction = "为这个句子生成表示以用于检索相关标签："
-        query_text = query_instruction + user_text
-        # 1. 计算用户输入的向量
         query_emb = model.encode(query_text, normalize_embeddings=True, convert_to_tensor=True)
-        # 2. 计算相似度
         scores = util.cos_sim(query_emb, emoji_embeddings)[0]
-        # 3. 找到得分最高的
         best_score = float(torch.max(scores))
         best_idx = int(torch.argmax(scores))
-        matched_item = emoji_data[best_idx]
-        # 4. 打印日志方便你在 HF 后台看
-        print(f"用户输入: {user_text}")
-        print(f"最高匹配: {matched_item['label']} ({matched_item['text']}) - 得分: {best_score:.4f}")
         if best_score > THRESHOLD:
-            return {
-                "label": matched_item['label'],
-                "score": best_score,
-                "matched_text": matched_item['text']
-            }
         else:
-            return {
-                "label": None,
-                "score": best_score,
-                "reason": "low_confidence"
-            }
     except Exception as e:
-        return {"error": str(e)}

 app = FastAPI()
+# 阈值设定
+# 如果用户说的话跟5种情绪都不沾边，就返回 neutral
+THRESHOLD = 0.35
+print("正在加载 BGE-Large-ZH-v1.5...")
 model = SentenceTransformer('BAAI/bge-large-zh-v1.5')
+print("模型加载完成")
+# 加载数据
+def load_data():
     if not os.path.exists('emoji_labels.json'):
         return [], None
     with open('emoji_labels.json', 'r', encoding='utf-8') as f:
         data = json.load(f)
     texts = [item['text'] for item in data]
+    # 预计算向量
     embeddings = model.encode(texts, normalize_embeddings=True, convert_to_tensor=True)
     return data, embeddings
+emoji_data, emoji_embeddings = load_data()
 @app.get("/")
 def home():
+    return "Kouri 5-Emotion System Ready"
 @app.post("/match")
 async def match_emoji(request: Request):
+    """
+    不管输入什么，只返回 5 种标签之一。
+    格式示例: {"tag": "[happy]"}
+    """
     try:
         body = await request.json()
         user_text = body.get("text", "")
+        # 兜底：空输入返回中立
         if not user_text or emoji_embeddings is None:
+            return {"tag": "[neutral]"}
+        # 构造查询指令
+        query_text = "为这个句子分类情感：" + user_text
         query_emb = model.encode(query_text, normalize_embeddings=True, convert_to_tensor=True)
+        # 计算相似度
         scores = util.cos_sim(query_emb, emoji_embeddings)[0]
         best_score = float(torch.max(scores))
         best_idx = int(torch.argmax(scores))
+        # 获取标签 (例如 "[happy]")
+        matched_label = emoji_data[best_idx]['label']
+        # 打印日志方便调试
+        print(f"输入: {user_text} | 匹配: {matched_label} | 分数: {best_score:.4f}")
         if best_score > THRESHOLD:
+            return {"tag": matched_label}
         else:
+            # 没匹配上也返回中立，保证稳定性
+            return {"tag": "[neutral]"}
     except Exception as e:
+        print(f"Error: {e}")
+        return {"tag": "[neutral]"}