fengxb30 commited on
Commit
16b0746
·
verified ·
1 Parent(s): 776d8e5

Delete FinGPT_TaskII_Submission/scripts/task_2_evaluate.py

Browse files
FinGPT_TaskII_Submission/scripts/task_2_evaluate.py DELETED
@@ -1,187 +0,0 @@
1
- #!/usr/bin/env python3
2
- """
3
- Task 2: Financial Sentiment Analysis - Starter Kit
4
- SecureFinAI Contest 2025
5
-
6
- Example script that loads the FPB dataset and model, using Fin-o1-8B on the FPB dataset.
7
- We will evaluate the submitted models using similar scripts based on different datasets settings.
8
- """
9
-
10
-
11
- import torch
12
- import os
13
- import sys
14
- from contextlib import redirect_stderr, redirect_stdout
15
- from io import StringIO
16
- from datasets import load_dataset
17
- from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, pipeline
18
- # add peft
19
- from peft import PeftModel
20
- from sklearn.metrics import accuracy_score
21
- import warnings
22
-
23
- # Suppress warnings
24
- warnings.filterwarnings('ignore')
25
-
26
- # Set logging levels
27
- import logging
28
-
29
- logging.getLogger("transformers").setLevel(logging.ERROR)
30
- logging.getLogger("transformers.generation_utils").setLevel(logging.ERROR)
31
-
32
-
33
- ADAPTER_PATH = "FinLoRA/axolotl-output/finai_fino1_8b_8bits_r8_lora"
34
-
35
-
36
- def setup_model():
37
- print(f"Loading Fin-o1-8B base model and adapter from {ADAPTER_PATH}...")
38
-
39
- # 检查适配器是否存在
40
- if not os.path.exists(ADAPTER_PATH):
41
- print(f"Error: Adapter path not found at {ADAPTER_PATH}")
42
- print("Please run 'try.py' first to train the adapter.")
43
- sys.exit(1)
44
-
45
- # Completely suppress output during model loading
46
- with redirect_stdout(StringIO()), redirect_stderr(StringIO()):
47
- quantization_config = BitsAndBytesConfig(
48
- load_in_4bit=True,
49
- bnb_4bit_compute_dtype=torch.float16,
50
- bnb_4bit_quant_type="nf4",
51
- bnb_4bit_use_double_quant=True,
52
- )
53
-
54
- # --- 变更点 4: 基础模型 ID 必须与 try.py 训练时使用的一致 ---
55
- model_id = "The-FinAI/Fin-o1-8B"
56
-
57
- tokenizer = AutoTokenizer.from_pretrained(
58
- model_id,
59
- trust_remote_code=True,
60
- padding_side="left"
61
- )
62
-
63
- if tokenizer.pad_token is None:
64
- tokenizer.pad_token = tokenizer.eos_token
65
-
66
- # --- 变更点 5: 先加载基础模型 ---
67
- base_model = AutoModelForCausalLM.from_pretrained(
68
- model_id,
69
- quantization_config=quantization_config,
70
- device_map="auto",
71
- trust_remote_code=True,
72
- torch_dtype=torch.float16,
73
- )
74
-
75
- # --- 变更点 6: 将 LoRA 适配器应用到基础模型上 ---
76
- print("Loading adapter...") # 在静默区之外打印,用于调试
77
- model = PeftModel.from_pretrained(base_model, ADAPTER_PATH)
78
- print("Adapter loaded.")
79
-
80
- text_generator = pipeline(
81
- "text-generation",
82
- model=model,
83
- tokenizer=tokenizer,
84
- max_new_tokens=10,
85
- do_sample=False,
86
- pad_token_id=tokenizer.eos_token_id,
87
- return_full_text=False
88
- )
89
-
90
- # 打印信息移到静默区之外
91
- print("Fine-tuned model (Base + Adapter) loaded!")
92
- return text_generator
93
-
94
-
95
- def predict_sentiment(text, text_generator):
96
- """Predict sentiment"""
97
- prompt = f"""Analyze the sentiment of this statement extracted from a financial news article. Provide your answer as either negative, positive, or neutral.
98
-
99
- Text: {text}
100
-
101
- Answer:"""
102
-
103
- try:
104
- # Temporarily suppress stdout/stderr
105
- with redirect_stdout(StringIO()), redirect_stderr(StringIO()):
106
- outputs = text_generator(prompt)
107
-
108
- # Since return_full_text=False, we get only the generated part
109
- response = outputs[0]['generated_text'].strip().lower()
110
-
111
- if "positive" in response:
112
- return "positive"
113
- elif "negative" in response:
114
- return "negative"
115
- else:
116
- return "neutral"
117
- except:
118
- return "neutral"
119
-
120
-
121
- def main():
122
- print("=== Task 2: Financial Sentiment Analysis (Evaluating Fine-Tuned Adapter) ===")
123
-
124
- # Load dataset
125
- print("Loading FPB dataset...")
126
- dataset = load_dataset("ChanceFocus/en-fpb")
127
- print(f"Dataset: train={len(dataset['train'])}, test={len(dataset['test'])}")
128
-
129
- # Setup model
130
- text_generator = setup_model()
131
-
132
- # Demo on 3 samples
133
- print("\n--- Demo Samples ---")
134
- label_names = ['positive', 'neutral', 'negative']
135
-
136
- for i in range(3):
137
- sample = dataset['test'][i]
138
- text = sample['text']
139
- true_label = label_names[sample['gold']]
140
-
141
- predicted = predict_sentiment(text, text_generator)
142
- correct = "✓" if predicted == true_label else "✗"
143
-
144
- print(f"\nSample {i + 1}: {correct}")
145
- print(f"Text: {text[:80]}...")
146
- print(f"True: {true_label} | Predicted: {predicted}")
147
-
148
- # Evaluate on full test set
149
- test_size = len(dataset['test'])
150
- print(f"\n--- Evaluating {test_size} samples ---")
151
- predictions = []
152
- true_labels = []
153
-
154
- for i in range(test_size):
155
- sample = dataset['test'][i]
156
- text = sample['text']
157
- true_label = label_names[sample['gold']]
158
-
159
- predicted = predict_sentiment(text, text_generator)
160
- predictions.append(predicted)
161
- true_labels.append(true_label)
162
-
163
- if i % 50 == 0:
164
- print(f"Processed {i + 1}/{test_size}...")
165
-
166
- # Results
167
- accuracy = accuracy_score(true_labels, predictions)
168
- print(f"\nAccuracy: {accuracy:.3f} ({accuracy * 100:.1f}%)")
169
-
170
- # Count by label
171
- correct_pos = sum(1 for t, p in zip(true_labels, predictions) if t == p == 'positive')
172
- correct_neu = sum(1 for t, p in zip(true_labels, predictions) if t == p == 'neutral')
173
- correct_neg = sum(1 for t, p in zip(true_labels, predictions) if t == p == 'negative')
174
-
175
- total_pos = true_labels.count('positive')
176
- total_neu = true_labels.count('neutral')
177
- total_neg = true_labels.count('negative')
178
-
179
- print(f"Positive: {correct_pos}/{total_pos}")
180
- print(f"Neutral: {correct_neu}/{total_neu}")
181
- print(f"Negative: {correct_neg}/{total_neg}")
182
-
183
- print("\nDemo completed!")
184
-
185
-
186
- if __name__ == "__main__":
187
- main()