import gradio as gr from transformers import AutoTokenizer, AutoModelForSequenceClassification import torch # ============== 1. 加载模型和分词器 ============== # 在Hugging Face Spaces中,如果模型文件和app.py在同一个目录下,可以直接使用"." MODEL_PATH = "." # 从本地文件加载分词器和模型 tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH) model = AutoModelForSequenceClassification.from_pretrained(MODEL_PATH) # 将模型移动到可用设备(GPU或CPU) device = "cuda" if torch.cuda.is_available() else "cpu" model.to(device) # ============== 2. 定义标签映射 (已根据数据集修正) ============== # 0: 代表推文与灾难无关 # 1: 代表推文是关于真实灾难的 id2label = {0: "非灾难相关", 1: "真实灾难"} # ============== 3. 定义推理函数 ============== def classify_text(text): """ 接收文本输入,返回一个包含各类标签及其概率的字典。 """ # 使用分词器处理输入文本 inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512, padding=True) # 将处理好的数据移动到与模型相同的设备 inputs = {key: val.to(device) for key, val in inputs.items()} # 不计算梯度,进行前向传播 with torch.no_grad(): logits = model(**inputs).logits # 使用softmax将logits转换为概率 probabilities = torch.softmax(logits, dim=1).squeeze().cpu().numpy() # 构建返回结果 confidences = {id2label[i]: float(prob) for i, prob in enumerate(probabilities)} return confidences # ============== 4. 创建并启动Gradio界面 ============== demo = gr.Interface( fn=classify_text, inputs=gr.Textbox(lines=5, placeholder="在这里输入一条英文推文..."), outputs=gr.Label(num_top_classes=2), title="Tweet Disaster Classifier", description="This is a text classification model based on BERT, which is used to determine whether a tweet describes a real disaster event. The model is trained on the 'nlp-getting-started' dataset from Kaggle.", examples=[ ["Forest fire near La Ronge Sask. Canada"], # 这是一个真实灾难的例子 ["Just got a new job! This is the bomb!"], # 这是一个非灾难的例子,虽然用了"bomb"这个词 ["My house is on fire!"], # 真实灾难 ["I love my cat."] # 非灾难 ] ) if __name__ == "__main__": demo.launch()