llm / app.py
YLXS's picture
Update app.py
50ac6f1 verified
import gradio as gr
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
# ============== 1. 加载模型和分词器 ==============
# 在Hugging Face Spaces中,如果模型文件和app.py在同一个目录下,可以直接使用"."
MODEL_PATH = "."
# 从本地文件加载分词器和模型
tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)
model = AutoModelForSequenceClassification.from_pretrained(MODEL_PATH)
# 将模型移动到可用设备(GPU或CPU)
device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)
# ============== 2. 定义标签映射 (已根据数据集修正) ==============
# 0: 代表推文与灾难无关
# 1: 代表推文是关于真实灾难的
id2label = {0: "非灾难相关", 1: "真实灾难"}
# ============== 3. 定义推理函数 ==============
def classify_text(text):
"""
接收文本输入,返回一个包含各类标签及其概率的字典。
"""
# 使用分词器处理输入文本
inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512, padding=True)
# 将处理好的数据移动到与模型相同的设备
inputs = {key: val.to(device) for key, val in inputs.items()}
# 不计算梯度,进行前向传播
with torch.no_grad():
logits = model(**inputs).logits
# 使用softmax将logits转换为概率
probabilities = torch.softmax(logits, dim=1).squeeze().cpu().numpy()
# 构建返回结果
confidences = {id2label[i]: float(prob) for i, prob in enumerate(probabilities)}
return confidences
# ============== 4. 创建并启动Gradio界面 ==============
demo = gr.Interface(
fn=classify_text,
inputs=gr.Textbox(lines=5, placeholder="在这里输入一条英文推文..."),
outputs=gr.Label(num_top_classes=2),
title="Tweet Disaster Classifier",
description="This is a text classification model based on BERT, which is used to determine whether a tweet describes a real disaster event. The model is trained on the 'nlp-getting-started' dataset from Kaggle.",
examples=[
["Forest fire near La Ronge Sask. Canada"], # 这是一个真实灾难的例子
["Just got a new job! This is the bomb!"], # 这是一个非灾难的例子,虽然用了"bomb"这个词
["My house is on fire!"], # 真实灾难
["I love my cat."] # 非灾难
]
)
if __name__ == "__main__":
demo.launch()