File size: 3,803 Bytes
445db58
 
 
 
 
c5c5c63
 
 
 
 
 
 
5bd9f7f
 
 
 
fb8a941
 
63d234a
 
 
 
 
 
 
fb8a941
63d234a
fb8a941
63d234a
 
 
fb8a941
63d234a
 
 
fb8a941
63d234a
 
 
fb8a941
 
63d234a
fb8a941
63d234a
 
 
fb8a941
63d234a
 
 
fb8a941
63d234a
 
 
fb8a941
63d234a
 
fb8a941
63d234a
 
fb8a941
63d234a
 
 
fb8a941
63d234a
 
fb8a941
63d234a
 
fb8a941
63d234a
 
 
fb8a941
63d234a
 
 
 
 
 
fb8a941
63d234a
 
 
 
fb8a941
63d234a
5bd9f7f
63d234a
 
 
 
 
5bd9f7f
 
 
9b707b4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
---
license: gpl-3.0
pipeline_tag: text-classification
tags:
- art

widget:
  - text: "牛犊初生敢问天,为官一任史无前。钎锤巧构蓝图景,岩壁砺磨钢铁肩。玉汝于成堪大智,红旗永艳有群贤。十风五雨千秋业,铸就惊天动地篇。"
  - text: "胎禽消息渺难知,小萼妆容故故迟。城郭渐随寒碧敛,湖山刚与晚阴宜,再来恐或成孤往,此去何由问所之。坐对空亭喧冻雀,可堪暝色向人垂。"
  - text: "异域风吹残帜斜,呜呼水木不清华。未闻史载分赃制,时见官乘夺路槎。有术掠民腾物价,无能让土息胡笳。两朝竭力推经济,遍地催开血色花。"
    

---

此模型的作用是对输入的简体七言律诗进行风格上的分类,详情见 https://mp.weixin.qq.com/s/P8FVCkI8-anDuLWQIAgs2w

使用方法如下:

```python
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import json
import torch.nn.functional as F
from zhconv import convert
import re

model_path = "qixun/qilv_classify"

# 加载模型和分词器
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForSequenceClassification.from_pretrained(model_path)

# 如果GPU可用,将模型移动到GPU
#device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
#model.to(device)

# 加载标签映射关系,label_mapping.json需要根据本机情况修改
with open("label_mapping.json", "r", encoding="utf-8") as f:
    label_mapping = json.load(f)


def classify_text(text):

    text = convert(text, 'zh-cn')
    # 去掉空格和换行
    text = text.replace(" ", "").replace("\n", "")

    # 检查文本长度是否为56个字符
    if len(text) != 64:
      return "请输入一首带标点的七言律诗"
    
    unique_characters = set(re.findall(r'[\u4e00-\u9fff]', text))
    if len(unique_characters) < 30:
        return "请输入一首正常的七言律诗"
    
    # 准备输入数据
    inputs = tokenizer(text, padding=True, truncation=True, return_tensors="pt", max_length=512)

    # 如GPU可用,将输入数据移动到GPU
    #inputs = {key: value.to(device) for key, value in inputs.items()}

    # 模型推断
    with torch.no_grad():
        outputs = model(**inputs)

    # 获取预测结果
    logits = outputs.logits

    # 计算每个类别的概率
    probabilities = F.softmax(logits, dim=-1)

    # 获取概率最高的三个分类及其概率
    top_k = 3
    top_probs, top_indices = torch.topk(probabilities, top_k, dim=-1)

    # 将预测结果转换为标签并附上概率
    results = []
    for j in range(top_k):
    label = label_mapping[str(top_indices[0][j].item())]
    prob = top_probs[0][j].item()
    results.append((label, prob))

    # 将结果格式化为字符串
    result_str = "文本: {}\n".format(text)
    for label, prob in results:
        result_str += "分类: {}, 概率: {:.4f}\n".format(label, prob)
    
    return result_str

# 示例调用
text = "胎禽消息渺难知,小萼妆容故故迟。城郭渐随寒碧敛,湖山刚与晚阴宜,再来恐或成孤往,此去何由问所之。坐对空亭喧冻雀,可堪暝色向人垂。"
result = classify_text(text)
print(result)
```





也可以直接在huggingface里输入一首加标点为64字符的简体七言律诗进行测试,label_mapping.json内容为:

    {
        "0": "中唐",
        "1": "乱码",
        "2": "冲塔",
        "3": "同光",
        "4": "复兴",
        "5": "实验",
        "6": "晚唐",
        "7": "江西",
        "8": "浙",
        "9": "浣花",
        "10": "理学",
        "11": "盛唐",
        "12": "艳体",
        "13": "诗界xx",
        "14": "赣",
        "15": "闽"
    }

大家自行转换。