File size: 2,466 Bytes
557b717
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50ac6f1
 
557b717
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
import gradio as gr
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch

# ============== 1. 加载模型和分词器 ==============
# 在Hugging Face Spaces中,如果模型文件和app.py在同一个目录下,可以直接使用"."
MODEL_PATH = "." 

# 从本地文件加载分词器和模型
tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)
model = AutoModelForSequenceClassification.from_pretrained(MODEL_PATH)

# 将模型移动到可用设备(GPU或CPU)
device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)

# ============== 2. 定义标签映射 (已根据数据集修正) ==============
# 0: 代表推文与灾难无关
# 1: 代表推文是关于真实灾难的
id2label = {0: "非灾难相关", 1: "真实灾难"}


# ============== 3. 定义推理函数 ==============
def classify_text(text):
    """
    接收文本输入,返回一个包含各类标签及其概率的字典。
    """
    # 使用分词器处理输入文本
    inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512, padding=True)
    
    # 将处理好的数据移动到与模型相同的设备
    inputs = {key: val.to(device) for key, val in inputs.items()}
    
    # 不计算梯度,进行前向传播
    with torch.no_grad():
        logits = model(**inputs).logits
        
    # 使用softmax将logits转换为概率
    probabilities = torch.softmax(logits, dim=1).squeeze().cpu().numpy()
    
    # 构建返回结果
    confidences = {id2label[i]: float(prob) for i, prob in enumerate(probabilities)}
    
    return confidences

# ============== 4. 创建并启动Gradio界面 ==============
demo = gr.Interface(
    fn=classify_text,
    inputs=gr.Textbox(lines=5, placeholder="在这里输入一条英文推文..."),
    outputs=gr.Label(num_top_classes=2),
    title="Tweet Disaster Classifier",
    description="This is a text classification model based on BERT, which is used to determine whether a tweet describes a real disaster event. The model is trained on the 'nlp-getting-started' dataset from Kaggle.",
    examples=[
        ["Forest fire near La Ronge Sask. Canada"], # 这是一个真实灾难的例子
        ["Just got a new job! This is the bomb!"], # 这是一个非灾难的例子,虽然用了"bomb"这个词
        ["My house is on fire!"], # 真实灾难
        ["I love my cat."] # 非灾难
    ]
)

if __name__ == "__main__":
    demo.launch()