AISkywalker commited on Apr 10, 2025

Commit

38d8dc2

verified ·

1 Parent(s): fb31fe9

Upload 30 files

Browse files

Files changed (30) hide show

DS_LoRA/README.md +202 -0
DS_LoRA/adapter_config.json +34 -0
DS_LoRA/adapter_model.safetensors +3 -0
DS_RL_model/README.md +202 -0
DS_RL_model/adapter_config.json +38 -0
DS_RL_model/adapter_model.safetensors +3 -0
Qwen_CoT_LoRA/README.md +202 -0
Qwen_CoT_LoRA/adapter_config.json +35 -0
Qwen_CoT_LoRA/adapter_model.safetensors +3 -0
Qwen_LoRA/README.md +202 -0
Qwen_LoRA/adapter_config.json +34 -0
Qwen_LoRA/adapter_model.safetensors +3 -0
code/GRPO.ipynb +460 -0
code/LORA.py +89 -0
code/LORA_with_CoT.py +119 -0
code/UI.py +181 -0
code/_MyModel.py +24 -0
code/__main__.py +15 -0
code/__pycache__/UI.cpython-311.pyc +0 -0
code/__pycache__/_MyModel.cpython-311.pyc +0 -0
code/__pycache__/deepseek_vaule.cpython-311.pyc +0 -0
code/__pycache__/reward.cpython-311.pyc +0 -0
code/__pycache__/train_nessary.cpython-311.pyc +0 -0
code/data_process.py +62 -0
code/deepseek_vaule.py +187 -0
code/getCOT.py +149 -0
code/reward.py +147 -0
code/test.ipynb +144 -0
code/threads_data_extract.py +194 -0
requirements.txt +16 -0

DS_LoRA/README.md ADDED Viewed

	@@ -0,0 +1,202 @@

+---
+base_model: deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B
+library_name: peft
+---
+# Model Card for Model ID
+<!-- Provide a quick summary of what the model is/does. -->
+## Model Details
+### Model Description
+<!-- Provide a longer summary of what this model is. -->
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+### Model Sources [optional]
+<!-- Provide the basic links for the model. -->
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+## Uses
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+### Direct Use
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+[More Information Needed]
+### Downstream Use [optional]
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+[More Information Needed]
+### Out-of-Scope Use
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+[More Information Needed]
+## Bias, Risks, and Limitations
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+[More Information Needed]
+### Recommendations
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+## How to Get Started with the Model
+Use the code below to get started with the model.
+[More Information Needed]
+## Training Details
+### Training Data
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+[More Information Needed]
+### Training Procedure
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+#### Preprocessing [optional]
+[More Information Needed]
+#### Training Hyperparameters
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+#### Speeds, Sizes, Times [optional]
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+[More Information Needed]
+## Evaluation
+<!-- This section describes the evaluation protocols and provides the results. -->
+### Testing Data, Factors & Metrics
+#### Testing Data
+<!-- This should link to a Dataset Card if possible. -->
+[More Information Needed]
+#### Factors
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+[More Information Needed]
+#### Metrics
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+[More Information Needed]
+### Results
+[More Information Needed]
+#### Summary
+## Model Examination [optional]
+<!-- Relevant interpretability work for the model goes here -->
+[More Information Needed]
+## Environmental Impact
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+## Technical Specifications [optional]
+### Model Architecture and Objective
+[More Information Needed]
+### Compute Infrastructure
+[More Information Needed]
+#### Hardware
+[More Information Needed]
+#### Software
+[More Information Needed]
+## Citation [optional]
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+**BibTeX:**
+[More Information Needed]
+**APA:**
+[More Information Needed]
+## Glossary [optional]
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+[More Information Needed]
+## More Information [optional]
+[More Information Needed]
+## Model Card Authors [optional]
+[More Information Needed]
+## Model Card Contact
+[More Information Needed]
+### Framework versions
+- PEFT 0.15.0

DS_LoRA/adapter_config.json ADDED Viewed

	@@ -0,0 +1,34 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
+  "bias": "none",
+  "corda_config": null,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_bias": false,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "trainable_token_indices": null,
+  "use_dora": false,
+  "use_rslora": false
+}

DS_LoRA/adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1b03770158458f7f2108bad02721e94cba1a8935ebe9e81038129be01a5f03bc
+size 4372840

DS_RL_model/README.md ADDED Viewed

	@@ -0,0 +1,202 @@

+---
+base_model: deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B
+library_name: peft
+---
+# Model Card for Model ID
+<!-- Provide a quick summary of what the model is/does. -->
+## Model Details
+### Model Description
+<!-- Provide a longer summary of what this model is. -->
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+### Model Sources [optional]
+<!-- Provide the basic links for the model. -->
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+## Uses
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+### Direct Use
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+[More Information Needed]
+### Downstream Use [optional]
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+[More Information Needed]
+### Out-of-Scope Use
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+[More Information Needed]
+## Bias, Risks, and Limitations
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+[More Information Needed]
+### Recommendations
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+## How to Get Started with the Model
+Use the code below to get started with the model.
+[More Information Needed]
+## Training Details
+### Training Data
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+[More Information Needed]
+### Training Procedure
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+#### Preprocessing [optional]
+[More Information Needed]
+#### Training Hyperparameters
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+#### Speeds, Sizes, Times [optional]
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+[More Information Needed]
+## Evaluation
+<!-- This section describes the evaluation protocols and provides the results. -->
+### Testing Data, Factors & Metrics
+#### Testing Data
+<!-- This should link to a Dataset Card if possible. -->
+[More Information Needed]
+#### Factors
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+[More Information Needed]
+#### Metrics
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+[More Information Needed]
+### Results
+[More Information Needed]
+#### Summary
+## Model Examination [optional]
+<!-- Relevant interpretability work for the model goes here -->
+[More Information Needed]
+## Environmental Impact
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+## Technical Specifications [optional]
+### Model Architecture and Objective
+[More Information Needed]
+### Compute Infrastructure
+[More Information Needed]
+#### Hardware
+[More Information Needed]
+#### Software
+[More Information Needed]
+## Citation [optional]
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+**BibTeX:**
+[More Information Needed]
+**APA:**
+[More Information Needed]
+## Glossary [optional]
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+[More Information Needed]
+## More Information [optional]
+[More Information Needed]
+## Model Card Authors [optional]
+[More Information Needed]
+## Model Card Contact
+[More Information Needed]
+### Framework versions
+- PEFT 0.15.1

DS_RL_model/adapter_config.json ADDED Viewed

	@@ -0,0 +1,38 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": {
+    "base_model_class": "PeftModelForCausalLM",
+    "parent_library": "peft.peft_model"
+  },
+  "base_model_name_or_path": null,
+  "bias": "none",
+  "corda_config": null,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 16,
+  "lora_bias": false,
+  "lora_dropout": 0.0,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "k_proj",
+    "q_proj",
+    "v_proj"
+  ],
+  "task_type": null,
+  "trainable_token_indices": null,
+  "use_dora": false,
+  "use_rslora": false
+}

DS_RL_model/adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:92cda569f4ae4f21ecbd0790b623428f349855221041023c8f578c0b188d6ac2
+size 5988672

Qwen_CoT_LoRA/README.md ADDED Viewed

	@@ -0,0 +1,202 @@

+---
+base_model: Qwen/Qwen2.5-0.5B-Instruct
+library_name: peft
+---
+# Model Card for Model ID
+<!-- Provide a quick summary of what the model is/does. -->
+## Model Details
+### Model Description
+<!-- Provide a longer summary of what this model is. -->
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+### Model Sources [optional]
+<!-- Provide the basic links for the model. -->
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+## Uses
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+### Direct Use
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+[More Information Needed]
+### Downstream Use [optional]
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+[More Information Needed]
+### Out-of-Scope Use
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+[More Information Needed]
+## Bias, Risks, and Limitations
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+[More Information Needed]
+### Recommendations
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+## How to Get Started with the Model
+Use the code below to get started with the model.
+[More Information Needed]
+## Training Details
+### Training Data
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+[More Information Needed]
+### Training Procedure
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+#### Preprocessing [optional]
+[More Information Needed]
+#### Training Hyperparameters
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+#### Speeds, Sizes, Times [optional]
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+[More Information Needed]
+## Evaluation
+<!-- This section describes the evaluation protocols and provides the results. -->
+### Testing Data, Factors & Metrics
+#### Testing Data
+<!-- This should link to a Dataset Card if possible. -->
+[More Information Needed]
+#### Factors
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+[More Information Needed]
+#### Metrics
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+[More Information Needed]
+### Results
+[More Information Needed]
+#### Summary
+## Model Examination [optional]
+<!-- Relevant interpretability work for the model goes here -->
+[More Information Needed]
+## Environmental Impact
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+## Technical Specifications [optional]
+### Model Architecture and Objective
+[More Information Needed]
+### Compute Infrastructure
+[More Information Needed]
+#### Hardware
+[More Information Needed]
+#### Software
+[More Information Needed]
+## Citation [optional]
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+**BibTeX:**
+[More Information Needed]
+**APA:**
+[More Information Needed]
+## Glossary [optional]
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+[More Information Needed]
+## More Information [optional]
+[More Information Needed]
+## Model Card Authors [optional]
+[More Information Needed]
+## Model Card Contact
+[More Information Needed]
+### Framework versions
+- PEFT 0.15.1

Qwen_CoT_LoRA/adapter_config.json ADDED Viewed

	@@ -0,0 +1,35 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": null,
+  "bias": "none",
+  "corda_config": null,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_bias": false,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "k_proj",
+    "v_proj",
+    "q_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "trainable_token_indices": null,
+  "use_dora": false,
+  "use_rslora": false
+}

Qwen_CoT_LoRA/adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:dbb7d5e3fea8f9085b091a8e9b37028f06cd23297d9d02a1e7fbf747310e4c86
+size 2970304

Qwen_LoRA/README.md ADDED Viewed

	@@ -0,0 +1,202 @@

+---
+base_model: /workspace/local_model
+library_name: peft
+---
+# Model Card for Model ID
+<!-- Provide a quick summary of what the model is/does. -->
+## Model Details
+### Model Description
+<!-- Provide a longer summary of what this model is. -->
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+### Model Sources [optional]
+<!-- Provide the basic links for the model. -->
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+## Uses
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+### Direct Use
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+[More Information Needed]
+### Downstream Use [optional]
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+[More Information Needed]
+### Out-of-Scope Use
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+[More Information Needed]
+## Bias, Risks, and Limitations
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+[More Information Needed]
+### Recommendations
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+## How to Get Started with the Model
+Use the code below to get started with the model.
+[More Information Needed]
+## Training Details
+### Training Data
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+[More Information Needed]
+### Training Procedure
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+#### Preprocessing [optional]
+[More Information Needed]
+#### Training Hyperparameters
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+#### Speeds, Sizes, Times [optional]
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+[More Information Needed]
+## Evaluation
+<!-- This section describes the evaluation protocols and provides the results. -->
+### Testing Data, Factors & Metrics
+#### Testing Data
+<!-- This should link to a Dataset Card if possible. -->
+[More Information Needed]
+#### Factors
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+[More Information Needed]
+#### Metrics
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+[More Information Needed]
+### Results
+[More Information Needed]
+#### Summary
+## Model Examination [optional]
+<!-- Relevant interpretability work for the model goes here -->
+[More Information Needed]
+## Environmental Impact
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+## Technical Specifications [optional]
+### Model Architecture and Objective
+[More Information Needed]
+### Compute Infrastructure
+[More Information Needed]
+#### Hardware
+[More Information Needed]
+#### Software
+[More Information Needed]
+## Citation [optional]
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+**BibTeX:**
+[More Information Needed]
+**APA:**
+[More Information Needed]
+## Glossary [optional]
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+[More Information Needed]
+## More Information [optional]
+[More Information Needed]
+## Model Card Authors [optional]
+[More Information Needed]
+## Model Card Contact
+[More Information Needed]
+### Framework versions
+- PEFT 0.15.0

Qwen_LoRA/adapter_config.json ADDED Viewed

	@@ -0,0 +1,34 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "/workspace/local_model",
+  "bias": "none",
+  "corda_config": null,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_bias": false,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "v_proj",
+    "q_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "trainable_token_indices": null,
+  "use_dora": false,
+  "use_rslora": false
+}

Qwen_LoRA/adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:45708acd3a64f3f203e8eb44855bfc122a62008d16776b5c73672ec6e102eab2
+size 2175168

code/GRPO.ipynb ADDED Viewed

	@@ -0,0 +1,460 @@

+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "e325d4b51fe4fad2",
+   "metadata": {},
+   "source": [
+    "# 加载模型以及数据"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "initial_id",
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-03-31T15:57:38.940490Z",
+     "start_time": "2025-03-31T15:57:29.198500Z"
+    },
+    "collapsed": true
+   },
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "E:\\共享\\GoodMusicV2.0\\.venv\\Lib\\site-packages\\tqdm\\auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
+      "  from .autonotebook import tqdm as notebook_tqdm\n"
+     ]
+    }
+   ],
+   "source": [
+    "from transformers import AutoModelForCausalLM, AutoTokenizer\n",
+    "from trl import GRPOTrainer, GRPOConfig  # 假设 trl 库中有 GRPOTrainer 模块\n",
+    "from peft import PeftModel\n",
+    "from reward import compute_rewards\n",
+    "def load_data(input_path):\n",
+    "    data = []\n",
+    "    with open(input_path, 'r', encoding='utf-8') as f:\n",
+    "        for line in f:\n",
+    "            line = line.strip()\n",
+    "            if not line:\n",
+    "                continue\n",
+    "            parts = line.split('<think>', 1)\n",
+    "            if len(parts) != 2:\n",
+    "                print(f\"警告: 格式错误的行，已跳过: {line}\")\n",
+    "                continue\n",
+    "            keywords_part, lyrics = parts[0], parts[1]\n",
+    "            keywords = [kw.strip() for kw in keywords_part.split('，')]\n",
+    "\n",
+    "            # 关键修改：使用关键词作为 prompt，歌词作为 completion\n",
+    "            data.append({\n",
+    "                'prompt': \"根据以下关键词生成一首歌词，歌词中包含多个句子，确保句子通顺,诗意,格式正确.让我们一步一步的思考（思考过程包含在<think>和</think>之间）：\" + \",\".join(keywords),  # 关键词拼接成字符串，作为模型输入\n",
+    "                'completion':  \"<think>\" + lyrics,  # 歌词（去掉多余空格），作为模型输出\n",
+    "                'keywords': keywords,  # 关键词拼接成字符串，作为模型输入\n",
+    "            })\n",
+    "    \n",
+    "    print(f\"成功加载 {len(data)} 条数据\")\n",
+    "    return data\n",
+    "\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "dc98a63e850836a",
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-03-31T15:57:38.963191Z",
+     "start_time": "2025-03-31T15:57:38.951488Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "成功加载 1000 条数据\n",
+      "第一条数据: {'prompt': '根据以下关键词生成一首歌词，歌词中包含多个句子，确保句子通顺,诗意,格式正确.让我们一步一步的思考（思考过程包含在<think>和</think>之间）：午夜,寒冬,心动', 'completion': '<think>嗯，用户让我根据“午夜，寒冬，心动”这三个关键词写一首诗。首先，我需要理解每个词带来的意象和情感。午夜通常给人一种寂静、神秘的感觉，可能带有孤独或反思的情绪。寒冬则联想到寒冷、冰雪，可能还有萧瑟的景象，但也可以有温暖的反差，比如炉火或者温暖的回忆。心动是情感的核心，可能涉及爱情、突然的情感波动，或者内心的温暖。接下来，我要考虑如何将这三个元素融合在一起。可能需要营造一个寒冷的冬夜环境，然后在这样的背景下引入心动的瞬间，形成对比。比如，用寒冷的景象来衬托内心的温暖或突然的情感变化。然后，结构方面，可能需要分几段，每段围绕一个关键词展开，但又要相互联系。比如，前两段描绘午夜和寒冬的景象，第三段引入心动的元素。或者交替出现，交织在一起。语言风格方面，用户可能希望有诗意和意象化的表达，避免直白。使用比喻、拟人等修辞手法，比如将心跳比作钟摆，或者用冰晶、霜花来表现寒冷。同时，注意押韵和节奏，使诗歌流畅。还需要考虑用户可能的深层需求。他们可能不仅仅想要一首描述这三个词的诗，而是希望表达在寒冷孤独的环境中突然感受到的温暖或爱情，强调对比和情感冲击。因此，诗里需要有转折点，从冷到暖，从静到动。可能会遇到的挑战是如何自然地将三个关键词串联起来，不显得生硬。可能需要找到一个共同的场景或隐喻，比如午夜寒冬中的相遇，或者内心的变化。同时，避免陈词滥调，用新颖的比喻，比如将霜花比作时针，或者心跳与钟摆的共振。最后，检查诗的整体意境是否统一，情感是否连贯，是否有足够的意象让读��产生共鸣。可能需要调整用词和结构，使每个部分都服务于主题，增强整体效果。</think>:《冰裂时刻》/石英血管里霜花正逆向生长/成为时针，成为所有表盘/反向旋转的证词。玻璃在窗框深处/爆出细纹，像候鸟迁徙时/骨骼深处迸发的裂响//壁炉吞下最后一块松木的年轮/灰烬在铁架上凝结成第六种晶体/这时钟摆突然停驻——/某种振频正穿过雪原/穿过二十年冻土下沉默的矿脉//冰层深处传来远古鲸群的应和/我们站在地磁偏转的切线上/听见彼此胸腔里/石英开始顺时针碎裂/春天正从裂缝中/涌出第一滴蓝', 'keywords': ['午夜', '寒冬', '心动']}\n"
+     ]
+    }
+   ],
+   "source": [
+    "dataset = load_data('../data/CoTdata.txt')\n",
+    "if dataset:\n",
+    "    print(\"第一条数据:\", dataset[0])\n",
+    "else:\n",
+    "    print(\"未加载到有效数据。\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "17175326d7901595",
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-03-31T15:57:45.180036Z",
+     "start_time": "2025-03-31T15:57:41.329675Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Sliding Window Attention is enabled but not implemented for `sdpa`; unexpected results may be encountered.\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "trainable params: 372,736 || all params: 1,777,460,736 || trainable%: 0.0210\n",
+      "None\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "E:\\共享\\GoodMusicV2.0\\.venv\\Lib\\site-packages\\peft\\mapping_func.py:73: UserWarning: You are trying to modify a model with PEFT for a second time. If you want to reload the model with a different config, make sure to call `.unload()` before.\n",
+      "  warnings.warn(\n",
+      "E:\\共享\\GoodMusicV2.0\\.venv\\Lib\\site-packages\\peft\\tuners\\tuners_utils.py:167: UserWarning: Already found a `peft_config` attribute in the model. This will lead to having multiple adapters in the model. Make sure to know what you are doing!\n",
+      "  warnings.warn(\n"
+     ]
+    }
+   ],
+   "source": [
+    "base_model = AutoModelForCausalLM.from_pretrained(\"deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B\").to(\"cuda\")\n",
+    "# 2. 加载 LoRA 适配器\n",
+    "model = PeftModel.from_pretrained(base_model, \"../3_26_LoRA\").to(\"cuda\")  # 你的 LoRA 路径\n",
+    "tokenizer = AutoTokenizer.from_pretrained(\"deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B\")\n",
+    "tokenizer.pad_token = tokenizer.eos_token\n",
+    "# Load LoRA   lora_config = LoraConfig(       task_type=\"CAUSAL_LM\",       r=16,       lora_alpha=32,       target_modules=\"all-linear\",   )   model = get_peft_model(model, lora_config)   print(model.print_trainable_parameters())   \n",
+    "from peft import LoraConfig, get_peft_model\n",
+    "target_modules = [\"q_proj\", \"k_proj\", \"v_proj\"] \n",
+    "lora_config = LoraConfig(\n",
+    "    r=2,                   # 秩（可尝试8~32）\n",
+    "    lora_alpha=32,          # 缩放系数（通常设为2*r）\n",
+    "    target_modules=target_modules,  \n",
+    "    bias=\"none\",            # 不训练偏置项\n",
+    ")\n",
+    "model = get_peft_model(model, lora_config)   \n",
+    "print(model.print_trainable_parameters()) "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "d2a601c596644fc",
+   "metadata": {},
+   "source": [
+    "# 配置训练参数"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "8ce28487acb606a2",
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-03-31T15:57:48.223131Z",
+     "start_time": "2025-03-31T15:57:48.149696Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "# 配置 GRPO 的超参数（这里的参数可以根据需要进行调整）\n",
+    "config = GRPOConfig(\n",
+    "    gradient_accumulation_steps = 50,    # 多少步更新一次参考模型\n",
+    "    per_device_train_batch_size=2,           # 每个批次的样本数\n",
+    "    epsilon=0.2,             # GRPO 中的 clip 范围\n",
+    "    beta=0.05,               # KL 惩罚系数\n",
+    "    num_train_epochs=1,     # 总训练步数(总周期)\n",
+    "    num_generations=2,           # 分组采样的大小\n",
+    "    learning_rate=1e-5,       # 优化器的学习率\n",
+    "    bf16=True,     \n",
+    "    adam_beta1=0.9,\n",
+    "    adam_beta2=0.98,\n",
+    "    optim=\"adamw_8bit\", # 优化器\n",
+    "    max_grad_norm=0.1,        # 梯度裁剪的最大值\n",
+    "    save_steps=1000,           # 多少步保存一次模型\n",
+    "    save_total_limit=2,       # 最多保存几个模型         \n",
+    "    logging_steps=5,         # 多少步打印一次训练信息\n",
+    "    output_dir=\"GRPO\",             # 模型保存路径\n",
+    "    weight_decay=0.01,     # 权重衰减\n",
+    "    warmup_ratio=0.03,       # 预热比例\n",
+    "    max_prompt_length=256,\n",
+    "    max_completion_length=1024, # 最大输出长度\n",
+    "    report_to='tensorboard', # or `tensorboard`\n",
+    ")\n",
+    "# Training arguments   training_args = GRPOConfig(       \n",
+    "# output_dir=\"GRPO\",    \n",
+    "# learning_rate=2e-5,   \n",
+    "# per_device_train_batch_size=8, \n",
+    "# gradient_accumulation_steps=2,     \n",
+    "# max_prompt_length=512,    \n",
+    "# max_completion_length=96,     \n",
+    "# num_generations=8,     \n",
+    "# optim=\"adamw_8bit\",      \n",
+    "# num_train_epochs=1,      \n",
+    "# bf16=True,     \n",
+    "# report_to=[\"wandb\"],   \n",
+    "# remove_unused_columns=False,    \n",
+    "# logging_steps=1,   \n",
+    "# )   "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "793b3094cd98fed6",
+   "metadata": {},
+   "source": [
+    "# 训练模型"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "19094188d22e45c2",
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-03-31T17:08:58.041584Z",
+     "start_time": "2025-03-31T15:57:50.316805Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "No label_names provided for model class `PeftModel`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "\n",
+       "    <div>\n",
+       "      \n",
+       "      <progress value='2' max='20' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
+       "      [ 2/20 : < :, Epoch 0.05/1]\n",
+       "    </div>\n",
+       "    <table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       " <tr style=\"text-align: left;\">\n",
+       "      <th>Step</th>\n",
+       "      <th>Training Loss</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "  </tbody>\n",
+       "</table><p>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "ename": "KeyboardInterrupt",
+     "evalue": "",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[31m---------------------------------------------------------------------------\u001b[39m",
+      "\u001b[31mKeyboardInterrupt\u001b[39m                         Traceback (most recent call last)",
+      "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[5]\u001b[39m\u001b[32m, line 16\u001b[39m\n\u001b[32m      1\u001b[39m \u001b[38;5;66;03m# Trainer   trainer = GRPOTrainer(    \u001b[39;00m\n\u001b[32m      2\u001b[39m \u001b[38;5;66;03m# model=model,   \u001b[39;00m\n\u001b[32m      3\u001b[39m \u001b[38;5;66;03m# reward_funcs=[reward_len], \u001b[39;00m\n\u001b[32m   (...)\u001b[39m\u001b[32m      8\u001b[39m \u001b[38;5;66;03m# wandb.init(project=\"GRPO\") \u001b[39;00m\n\u001b[32m      9\u001b[39m \u001b[38;5;66;03m# trainer.train()\u001b[39;00m\n\u001b[32m     10\u001b[39m trainer = GRPOTrainer(\n\u001b[32m     11\u001b[39m     model=model,\n\u001b[32m     12\u001b[39m     reward_funcs=[compute_rewards],\n\u001b[32m     13\u001b[39m     args=config,\n\u001b[32m     14\u001b[39m     train_dataset=dataset\n\u001b[32m     15\u001b[39m )\n\u001b[32m---> \u001b[39m\u001b[32m16\u001b[39m \u001b[43mtrainer\u001b[49m\u001b[43m.\u001b[49m\u001b[43mtrain\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n",
+      "\u001b[36mFile \u001b[39m\u001b[32mE:\\共享\\GoodMusicV2.0\\.venv\\Lib\\site-packages\\transformers\\trainer.py:2245\u001b[39m, in \u001b[36mTrainer.train\u001b[39m\u001b[34m(self, resume_from_checkpoint, trial, ignore_keys_for_eval, **kwargs)\u001b[39m\n\u001b[32m   2243\u001b[39m         hf_hub_utils.enable_progress_bars()\n\u001b[32m   2244\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m-> \u001b[39m\u001b[32m2245\u001b[39m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43minner_training_loop\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m   2246\u001b[39m \u001b[43m        \u001b[49m\u001b[43margs\u001b[49m\u001b[43m=\u001b[49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m   2247\u001b[39m \u001b[43m        \u001b[49m\u001b[43mresume_from_checkpoint\u001b[49m\u001b[43m=\u001b[49m\u001b[43mresume_from_checkpoint\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m   2248\u001b[39m \u001b[43m        \u001b[49m\u001b[43mtrial\u001b[49m\u001b[43m=\u001b[49m\u001b[43mtrial\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m   2249\u001b[39m \u001b[43m        \u001b[49m\u001b[43mignore_keys_for_eval\u001b[49m\u001b[43m=\u001b[49m\u001b[43mignore_keys_for_eval\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m   2250\u001b[39m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n",
+      "\u001b[36mFile \u001b[39m\u001b[32mE:\\共享\\GoodMusicV2.0\\.venv\\Lib\\site-packages\\transformers\\trainer.py:2556\u001b[39m, in \u001b[36mTrainer._inner_training_loop\u001b[39m\u001b[34m(self, batch_size, args, resume_from_checkpoint, trial, ignore_keys_for_eval)\u001b[39m\n\u001b[32m   2549\u001b[39m context = (\n\u001b[32m   2550\u001b[39m     functools.partial(\u001b[38;5;28mself\u001b[39m.accelerator.no_sync, model=model)\n\u001b[32m   2551\u001b[39m     \u001b[38;5;28;01mif\u001b[39;00m i != \u001b[38;5;28mlen\u001b[39m(batch_samples) - \u001b[32m1\u001b[39m\n\u001b[32m   2552\u001b[39m     \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mself\u001b[39m.accelerator.distributed_type != DistributedType.DEEPSPEED\n\u001b[32m   2553\u001b[39m     \u001b[38;5;28;01melse\u001b[39;00m contextlib.nullcontext\n\u001b[32m   2554\u001b[39m )\n\u001b[32m   2555\u001b[39m \u001b[38;5;28;01mwith\u001b[39;00m context():\n\u001b[32m-> \u001b[39m\u001b[32m2556\u001b[39m     tr_loss_step = \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mtraining_step\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmodel\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43minputs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mnum_items_in_batch\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m   2558\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m (\n\u001b[32m   2559\u001b[39m     args.logging_nan_inf_filter\n\u001b[32m   2560\u001b[39m     \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m is_torch_xla_available()\n\u001b[32m   2561\u001b[39m     \u001b[38;5;129;01mand\u001b[39;00m (torch.isnan(tr_loss_step) \u001b[38;5;129;01mor\u001b[39;00m torch.isinf(tr_loss_step))\n\u001b[32m   2562\u001b[39m ):\n\u001b[32m   2563\u001b[39m     \u001b[38;5;66;03m# if loss is nan or inf simply add the average of previous logged losses\u001b[39;00m\n\u001b[32m   2564\u001b[39m     tr_loss = tr_loss + tr_loss / (\u001b[32m1\u001b[39m + \u001b[38;5;28mself\u001b[39m.state.global_step - \u001b[38;5;28mself\u001b[39m._globalstep_last_logged)\n",
+      "\u001b[36mFile \u001b[39m\u001b[32mE:\\共享\\GoodMusicV2.0\\.venv\\Lib\\site-packages\\transformers\\trainer.py:3712\u001b[39m, in \u001b[36mTrainer.training_step\u001b[39m\u001b[34m(self, model, inputs, num_items_in_batch)\u001b[39m\n\u001b[32m   3709\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mhasattr\u001b[39m(\u001b[38;5;28mself\u001b[39m.optimizer, \u001b[33m\"\u001b[39m\u001b[33mtrain\u001b[39m\u001b[33m\"\u001b[39m) \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mcallable\u001b[39m(\u001b[38;5;28mself\u001b[39m.optimizer.train):\n\u001b[32m   3710\u001b[39m     \u001b[38;5;28mself\u001b[39m.optimizer.train()\n\u001b[32m-> \u001b[39m\u001b[32m3712\u001b[39m inputs = \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_prepare_inputs\u001b[49m\u001b[43m(\u001b[49m\u001b[43minputs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m   3713\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m is_sagemaker_mp_enabled():\n\u001b[32m   3714\u001b[39m     loss_mb = smp_forward_backward(model, inputs, \u001b[38;5;28mself\u001b[39m.args.gradient_accumulation_steps)\n",
+      "\u001b[36mFile \u001b[39m\u001b[32mE:\\共享\\GoodMusicV2.0\\.venv\\Lib\\site-packages\\trl\\extras\\profiling.py:87\u001b[39m, in \u001b[36mprofiling_decorator.<locals>.wrapper\u001b[39m\u001b[34m(self, *args, **kwargs)\u001b[39m\n\u001b[32m     84\u001b[39m \u001b[38;5;129m@functools\u001b[39m.wraps(func)\n\u001b[32m     85\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34mwrapper\u001b[39m(\u001b[38;5;28mself\u001b[39m, *args, **kwargs):\n\u001b[32m     86\u001b[39m     \u001b[38;5;28;01mwith\u001b[39;00m profiling_context(\u001b[38;5;28mself\u001b[39m, func.\u001b[34m__name__\u001b[39m):\n\u001b[32m---> \u001b[39m\u001b[32m87\u001b[39m         \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
+      "\u001b[36mFile \u001b[39m\u001b[32mE:\\共享\\GoodMusicV2.0\\.venv\\Lib\\site-packages\\trl\\trainer\\grpo_trainer.py:647\u001b[39m, in \u001b[36mGRPOTrainer._prepare_inputs\u001b[39m\u001b[34m(self, inputs)\u001b[39m\n\u001b[32m    645\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m mode == \u001b[33m\"\u001b[39m\u001b[33mtrain\u001b[39m\u001b[33m\"\u001b[39m:\n\u001b[32m    646\u001b[39m     \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m.state.global_step % \u001b[38;5;28mself\u001b[39m.num_iterations == \u001b[32m0\u001b[39m:\n\u001b[32m--> \u001b[39m\u001b[32m647\u001b[39m         inputs = \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_generate_and_score_completions\u001b[49m\u001b[43m(\u001b[49m\u001b[43minputs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m    648\u001b[39m         \u001b[38;5;28mself\u001b[39m._buffered_inputs[\u001b[38;5;28mself\u001b[39m._step % \u001b[38;5;28mself\u001b[39m.args.gradient_accumulation_steps] = inputs\n\u001b[32m    649\u001b[39m     \u001b[38;5;28;01melse\u001b[39;00m:\n",
+      "\u001b[36mFile \u001b[39m\u001b[32mE:\\共享\\GoodMusicV2.0\\.venv\\Lib\\site-packages\\trl\\trainer\\grpo_trainer.py:719\u001b[39m, in \u001b[36mGRPOTrainer._generate_and_score_completions\u001b[39m\u001b[34m(self, inputs)\u001b[39m\n\u001b[32m    714\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m    715\u001b[39m     \u001b[38;5;66;03m# Regular generation path\u001b[39;00m\n\u001b[32m    716\u001b[39m     \u001b[38;5;28;01mwith\u001b[39;00m unwrap_model_for_generation(\n\u001b[32m    717\u001b[39m         \u001b[38;5;28mself\u001b[39m.model_wrapped, \u001b[38;5;28mself\u001b[39m.accelerator, gather_deepspeed3_params=\u001b[38;5;28mself\u001b[39m.args.ds3_gather_for_generation\n\u001b[32m    718\u001b[39m     ) \u001b[38;5;28;01mas\u001b[39;00m unwrapped_model:\n\u001b[32m--> \u001b[39m\u001b[32m719\u001b[39m         prompt_completion_ids = \u001b[43munwrapped_model\u001b[49m\u001b[43m.\u001b[49m\u001b[43mgenerate\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m    720\u001b[39m \u001b[43m            \u001b[49m\u001b[43mprompt_ids\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mattention_mask\u001b[49m\u001b[43m=\u001b[49m\u001b[43mprompt_mask\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mgeneration_config\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mgeneration_config\u001b[49m\n\u001b[32m    721\u001b[39m \u001b[43m        \u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m    723\u001b[39m     \u001b[38;5;66;03m# Compute prompt length and extract completion ids\u001b[39;00m\n\u001b[32m    724\u001b[39m     prompt_length = prompt_ids.size(\u001b[32m1\u001b[39m)\n",
+      "\u001b[36mFile \u001b[39m\u001b[32mE:\\共享\\GoodMusicV2.0\\.venv\\Lib\\site-packages\\peft\\peft_model.py:823\u001b[39m, in \u001b[36mPeftModel.generate\u001b[39m\u001b[34m(self, *args, **kwargs)\u001b[39m\n\u001b[32m    821\u001b[39m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mself\u001b[39m._enable_peft_forward_hooks(*args, **kwargs):\n\u001b[32m    822\u001b[39m     kwargs = {k: v \u001b[38;5;28;01mfor\u001b[39;00m k, v \u001b[38;5;129;01min\u001b[39;00m kwargs.items() \u001b[38;5;28;01mif\u001b[39;00m k \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m.special_peft_forward_args}\n\u001b[32m--> \u001b[39m\u001b[32m823\u001b[39m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mget_base_model\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m.\u001b[49m\u001b[43mgenerate\u001b[49m\u001b[43m(\u001b[49m\u001b[43m*\u001b[49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
+      "\u001b[36mFile \u001b[39m\u001b[32mE:\\共享\\GoodMusicV2.0\\.venv\\Lib\\site-packages\\peft\\peft_model.py:1874\u001b[39m, in \u001b[36mPeftModelForCausalLM.generate\u001b[39m\u001b[34m(self, *args, **kwargs)\u001b[39m\n\u001b[32m   1872\u001b[39m     \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mself\u001b[39m._enable_peft_forward_hooks(*args, **kwargs):\n\u001b[32m   1873\u001b[39m         kwargs = {k: v \u001b[38;5;28;01mfor\u001b[39;00m k, v \u001b[38;5;129;01min\u001b[39;00m kwargs.items() \u001b[38;5;28;01mif\u001b[39;00m k \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m.special_peft_forward_args}\n\u001b[32m-> \u001b[39m\u001b[32m1874\u001b[39m         outputs = \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mbase_model\u001b[49m\u001b[43m.\u001b[49m\u001b[43mgenerate\u001b[49m\u001b[43m(\u001b[49m\u001b[43m*\u001b[49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m   1875\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m   1876\u001b[39m     outputs = \u001b[38;5;28mself\u001b[39m.base_model.generate(**kwargs)\n",
+      "\u001b[36mFile \u001b[39m\u001b[32mE:\\共享\\GoodMusicV2.0\\.venv\\Lib\\site-packages\\torch\\utils\\_contextlib.py:116\u001b[39m, in \u001b[36mcontext_decorator.<locals>.decorate_context\u001b[39m\u001b[34m(*args, **kwargs)\u001b[39m\n\u001b[32m    113\u001b[39m \u001b[38;5;129m@functools\u001b[39m.wraps(func)\n\u001b[32m    114\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34mdecorate_context\u001b[39m(*args, **kwargs):\n\u001b[32m    115\u001b[39m     \u001b[38;5;28;01mwith\u001b[39;00m ctx_factory():\n\u001b[32m--> \u001b[39m\u001b[32m116\u001b[39m         \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[43m*\u001b[49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
+      "\u001b[36mFile \u001b[39m\u001b[32mE:\\共享\\GoodMusicV2.0\\.venv\\Lib\\site-packages\\transformers\\generation\\utils.py:2326\u001b[39m, in \u001b[36mGenerationMixin.generate\u001b[39m\u001b[34m(self, inputs, generation_config, logits_processor, stopping_criteria, prefix_allowed_tokens_fn, synced_gpus, assistant_model, streamer, negative_prompt_ids, negative_prompt_attention_mask, use_model_defaults, **kwargs)\u001b[39m\n\u001b[32m   2318\u001b[39m     input_ids, model_kwargs = \u001b[38;5;28mself\u001b[39m._expand_inputs_for_generation(\n\u001b[32m   2319\u001b[39m         input_ids=input_ids,\n\u001b[32m   2320\u001b[39m         expand_size=generation_config.num_return_sequences,\n\u001b[32m   2321\u001b[39m         is_encoder_decoder=\u001b[38;5;28mself\u001b[39m.config.is_encoder_decoder,\n\u001b[32m   2322\u001b[39m         **model_kwargs,\n\u001b[32m   2323\u001b[39m     )\n\u001b[32m   2325\u001b[39m     \u001b[38;5;66;03m# 12. run sample (it degenerates to greedy search when `generation_config.do_sample=False`)\u001b[39;00m\n\u001b[32m-> \u001b[39m\u001b[32m2326\u001b[39m     result = \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_sample\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m   2327\u001b[39m \u001b[43m        \u001b[49m\u001b[43minput_ids\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m   2328\u001b[39m \u001b[43m        \u001b[49m\u001b[43mlogits_processor\u001b[49m\u001b[43m=\u001b[49m\u001b[43mprepared_logits_processor\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m   2329\u001b[39m \u001b[43m        \u001b[49m\u001b[43mstopping_criteria\u001b[49m\u001b[43m=\u001b[49m\u001b[43mprepared_stopping_criteria\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m   2330\u001b[39m \u001b[43m        \u001b[49m\u001b[43mgeneration_config\u001b[49m\u001b[43m=\u001b[49m\u001b[43mgeneration_config\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m   2331\u001b[39m \u001b[43m        \u001b[49m\u001b[43msynced_gpus\u001b[49m\u001b[43m=\u001b[49m\u001b[43msynced_gpus\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m   2332\u001b[39m \u001b[43m        \u001b[49m\u001b[43mstreamer\u001b[49m\u001b[43m=\u001b[49m\u001b[43mstreamer\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m   2333\u001b[39m \u001b[43m        \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mmodel_kwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m   2334\u001b[39m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m   2336\u001b[39m \u001b[38;5;28;01melif\u001b[39;00m generation_mode \u001b[38;5;129;01min\u001b[39;00m (GenerationMode.BEAM_SAMPLE, GenerationMode.BEAM_SEARCH):\n\u001b[32m   2337\u001b[39m     \u001b[38;5;66;03m# 11. interleave input_ids with `num_beams` additional sequences per batch\u001b[39;00m\n\u001b[32m   2338\u001b[39m     input_ids, model_kwargs = \u001b[38;5;28mself\u001b[39m._expand_inputs_for_generation(\n\u001b[32m   2339\u001b[39m         input_ids=input_ids,\n\u001b[32m   2340\u001b[39m         expand_size=generation_config.num_beams,\n\u001b[32m   2341\u001b[39m         is_encoder_decoder=\u001b[38;5;28mself\u001b[39m.config.is_encoder_decoder,\n\u001b[32m   2342\u001b[39m         **model_kwargs,\n\u001b[32m   2343\u001b[39m     )\n",
+      "\u001b[36mFile \u001b[39m\u001b[32mE:\\共享\\GoodMusicV2.0\\.venv\\Lib\\site-packages\\transformers\\generation\\utils.py:3289\u001b[39m, in \u001b[36mGenerationMixin._sample\u001b[39m\u001b[34m(self, input_ids, logits_processor, stopping_criteria, generation_config, synced_gpus, streamer, **model_kwargs)\u001b[39m\n\u001b[32m   3287\u001b[39m     is_prefill = \u001b[38;5;28;01mFalse\u001b[39;00m\n\u001b[32m   3288\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m-> \u001b[39m\u001b[32m3289\u001b[39m     outputs = \u001b[43mmodel_forward\u001b[49m\u001b[43m(\u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mmodel_inputs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mreturn_dict\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m)\u001b[49m\n\u001b[32m   3291\u001b[39m \u001b[38;5;66;03m# synced_gpus: don't waste resources running the code we don't need; kwargs must be updated before skipping\u001b[39;00m\n\u001b[32m   3292\u001b[39m model_kwargs = \u001b[38;5;28mself\u001b[39m._update_model_kwargs_for_generation(\n\u001b[32m   3293\u001b[39m     outputs,\n\u001b[32m   3294\u001b[39m     model_kwargs,\n\u001b[32m   3295\u001b[39m     is_encoder_decoder=\u001b[38;5;28mself\u001b[39m.config.is_encoder_decoder,\n\u001b[32m   3296\u001b[39m )\n",
+      "\u001b[36mFile \u001b[39m\u001b[32mE:\\共享\\GoodMusicV2.0\\.venv\\Lib\\site-packages\\torch\\nn\\modules\\module.py:1739\u001b[39m, in \u001b[36mModule._wrapped_call_impl\u001b[39m\u001b[34m(self, *args, **kwargs)\u001b[39m\n\u001b[32m   1737\u001b[39m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m._compiled_call_impl(*args, **kwargs)  \u001b[38;5;66;03m# type: ignore[misc]\u001b[39;00m\n\u001b[32m   1738\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m-> \u001b[39m\u001b[32m1739\u001b[39m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_call_impl\u001b[49m\u001b[43m(\u001b[49m\u001b[43m*\u001b[49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
+      "\u001b[36mFile \u001b[39m\u001b[32mE:\\共享\\GoodMusicV2.0\\.venv\\Lib\\site-packages\\torch\\nn\\modules\\module.py:1750\u001b[39m, in \u001b[36mModule._call_impl\u001b[39m\u001b[34m(self, *args, **kwargs)\u001b[39m\n\u001b[32m   1745\u001b[39m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[32m   1746\u001b[39m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[32m   1747\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m._backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m._backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m._forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m._forward_pre_hooks\n\u001b[32m   1748\u001b[39m         \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[32m   1749\u001b[39m         \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[32m-> \u001b[39m\u001b[32m1750\u001b[39m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mforward_call\u001b[49m\u001b[43m(\u001b[49m\u001b[43m*\u001b[49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m   1752\u001b[39m result = \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[32m   1753\u001b[39m called_always_called_hooks = \u001b[38;5;28mset\u001b[39m()\n",
+      "\u001b[36mFile \u001b[39m\u001b[32mE:\\共享\\GoodMusicV2.0\\.venv\\Lib\\site-packages\\transformers\\utils\\deprecation.py:172\u001b[39m, in \u001b[36mdeprecate_kwarg.<locals>.wrapper.<locals>.wrapped_func\u001b[39m\u001b[34m(*args, **kwargs)\u001b[39m\n\u001b[32m    168\u001b[39m \u001b[38;5;28;01melif\u001b[39;00m minimum_action \u001b[38;5;129;01min\u001b[39;00m (Action.NOTIFY, Action.NOTIFY_ALWAYS) \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m is_torchdynamo_compiling():\n\u001b[32m    169\u001b[39m     \u001b[38;5;66;03m# DeprecationWarning is ignored by default, so we use FutureWarning instead\u001b[39;00m\n\u001b[32m    170\u001b[39m     warnings.warn(message, \u001b[38;5;167;01mFutureWarning\u001b[39;00m, stacklevel=\u001b[32m2\u001b[39m)\n\u001b[32m--> \u001b[39m\u001b[32m172\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[43m*\u001b[49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
+      "\u001b[36mFile \u001b[39m\u001b[32mE:\\共享\\GoodMusicV2.0\\.venv\\Lib\\site-packages\\transformers\\models\\qwen2\\modeling_qwen2.py:855\u001b[39m, in \u001b[36mQwen2ForCausalLM.forward\u001b[39m\u001b[34m(self, input_ids, attention_mask, position_ids, past_key_values, inputs_embeds, labels, use_cache, output_attentions, output_hidden_states, return_dict, cache_position, logits_to_keep, **kwargs)\u001b[39m\n\u001b[32m    852\u001b[39m return_dict = return_dict \u001b[38;5;28;01mif\u001b[39;00m return_dict \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;28mself\u001b[39m.config.use_return_dict\n\u001b[32m    854\u001b[39m \u001b[38;5;66;03m# decoder outputs consists of (dec_features, layer_state, dec_hidden, dec_attn)\u001b[39;00m\n\u001b[32m--> \u001b[39m\u001b[32m855\u001b[39m outputs = \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mmodel\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m    856\u001b[39m \u001b[43m    \u001b[49m\u001b[43minput_ids\u001b[49m\u001b[43m=\u001b[49m\u001b[43minput_ids\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m    857\u001b[39m \u001b[43m    \u001b[49m\u001b[43mattention_mask\u001b[49m\u001b[43m=\u001b[49m\u001b[43mattention_mask\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m    858\u001b[39m \u001b[43m    \u001b[49m\u001b[43mposition_ids\u001b[49m\u001b[43m=\u001b[49m\u001b[43mposition_ids\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m    859\u001b[39m \u001b[43m    \u001b[49m\u001b[43mpast_key_values\u001b[49m\u001b[43m=\u001b[49m\u001b[43mpast_key_values\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m    860\u001b[39m \u001b[43m    \u001b[49m\u001b[43minputs_embeds\u001b[49m\u001b[43m=\u001b[49m\u001b[43minputs_embeds\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m    861\u001b[39m \u001b[43m    \u001b[49m\u001b[43muse_cache\u001b[49m\u001b[43m=\u001b[49m\u001b[43muse_cache\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m    862\u001b[39m \u001b[43m    \u001b[49m\u001b[43moutput_attentions\u001b[49m\u001b[43m=\u001b[49m\u001b[43moutput_attentions\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m    863\u001b[39m \u001b[43m    \u001b[49m\u001b[43moutput_hidden_states\u001b[49m\u001b[43m=\u001b[49m\u001b[43moutput_hidden_states\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m    864\u001b[39m \u001b[43m    \u001b[49m\u001b[43mreturn_dict\u001b[49m\u001b[43m=\u001b[49m\u001b[43mreturn_dict\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m    865\u001b[39m \u001b[43m    \u001b[49m\u001b[43mcache_position\u001b[49m\u001b[43m=\u001b[49m\u001b[43mcache_position\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m    866\u001b[39m \u001b[43m    \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m    867\u001b[39m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m    869\u001b[39m hidden_states = outputs[\u001b[32m0\u001b[39m]\n\u001b[32m    870\u001b[39m \u001b[38;5;66;03m# Only compute necessary logits, and do not upcast them to float if we are not computing the loss\u001b[39;00m\n",
+      "\u001b[36mFile \u001b[39m\u001b[32mE:\\共享\\GoodMusicV2.0\\.venv\\Lib\\site-packages\\torch\\nn\\modules\\module.py:1739\u001b[39m, in \u001b[36mModule._wrapped_call_impl\u001b[39m\u001b[34m(self, *args, **kwargs)\u001b[39m\n\u001b[32m   1737\u001b[39m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m._compiled_call_impl(*args, **kwargs)  \u001b[38;5;66;03m# type: ignore[misc]\u001b[39;00m\n\u001b[32m   1738\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m-> \u001b[39m\u001b[32m1739\u001b[39m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_call_impl\u001b[49m\u001b[43m(\u001b[49m\u001b[43m*\u001b[49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
+      "\u001b[36mFile \u001b[39m\u001b[32mE:\\共享\\GoodMusicV2.0\\.venv\\Lib\\site-packages\\torch\\nn\\modules\\module.py:1750\u001b[39m, in \u001b[36mModule._call_impl\u001b[39m\u001b[34m(self, *args, **kwargs)\u001b[39m\n\u001b[32m   1745\u001b[39m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[32m   1746\u001b[39m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[32m   1747\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m._backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m._backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m._forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m._forward_pre_hooks\n\u001b[32m   1748\u001b[39m         \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[32m   1749\u001b[39m         \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[32m-> \u001b[39m\u001b[32m1750\u001b[39m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mforward_call\u001b[49m\u001b[43m(\u001b[49m\u001b[43m*\u001b[49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m   1752\u001b[39m result = \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[32m   1753\u001b[39m called_always_called_hooks = \u001b[38;5;28mset\u001b[39m()\n",
+      "\u001b[36mFile \u001b[39m\u001b[32mE:\\共享\\GoodMusicV2.0\\.venv\\Lib\\site-packages\\transformers\\models\\qwen2\\modeling_qwen2.py:579\u001b[39m, in \u001b[36mQwen2Model.forward\u001b[39m\u001b[34m(self, input_ids, attention_mask, position_ids, past_key_values, inputs_embeds, use_cache, output_attentions, output_hidden_states, return_dict, cache_position, **flash_attn_kwargs)\u001b[39m\n\u001b[32m    567\u001b[39m     layer_outputs = \u001b[38;5;28mself\u001b[39m._gradient_checkpointing_func(\n\u001b[32m    568\u001b[39m         decoder_layer.\u001b[34m__call__\u001b[39m,\n\u001b[32m    569\u001b[39m         hidden_states,\n\u001b[32m   (...)\u001b[39m\u001b[32m    576\u001b[39m         position_embeddings,\n\u001b[32m    577\u001b[39m     )\n\u001b[32m    578\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m579\u001b[39m     layer_outputs = \u001b[43mdecoder_layer\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m    580\u001b[39m \u001b[43m        \u001b[49m\u001b[43mhidden_states\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m    581\u001b[39m \u001b[43m        \u001b[49m\u001b[43mattention_mask\u001b[49m\u001b[43m=\u001b[49m\u001b[43mcausal_mask\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m    582\u001b[39m \u001b[43m        \u001b[49m\u001b[43mposition_ids\u001b[49m\u001b[43m=\u001b[49m\u001b[43mposition_ids\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m    583\u001b[39m \u001b[43m        \u001b[49m\u001b[43mpast_key_value\u001b[49m\u001b[43m=\u001b[49m\u001b[43mpast_key_values\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m    584\u001b[39m \u001b[43m        \u001b[49m\u001b[43moutput_attentions\u001b[49m\u001b[43m=\u001b[49m\u001b[43moutput_attentions\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m    585\u001b[39m \u001b[43m        \u001b[49m\u001b[43muse_cache\u001b[49m\u001b[43m=\u001b[49m\u001b[43muse_cache\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m    586\u001b[39m \u001b[43m        \u001b[49m\u001b[43mcache_position\u001b[49m\u001b[43m=\u001b[49m\u001b[43mcache_position\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m    587\u001b[39m \u001b[43m        \u001b[49m\u001b[43mposition_embeddings\u001b[49m\u001b[43m=\u001b[49m\u001b[43mposition_embeddings\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m    588\u001b[39m \u001b[43m        \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mflash_attn_kwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m    589\u001b[39m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m    591\u001b[39m hidden_states = layer_outputs[\u001b[32m0\u001b[39m]\n\u001b[32m    593\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m output_attentions:\n",
+      "\u001b[36mFile \u001b[39m\u001b[32mE:\\共享\\GoodMusicV2.0\\.venv\\Lib\\site-packages\\torch\\nn\\modules\\module.py:1739\u001b[39m, in \u001b[36mModule._wrapped_call_impl\u001b[39m\u001b[34m(self, *args, **kwargs)\u001b[39m\n\u001b[32m   1737\u001b[39m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m._compiled_call_impl(*args, **kwargs)  \u001b[38;5;66;03m# type: ignore[misc]\u001b[39;00m\n\u001b[32m   1738\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m-> \u001b[39m\u001b[32m1739\u001b[39m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_call_impl\u001b[49m\u001b[43m(\u001b[49m\u001b[43m*\u001b[49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
+      "\u001b[36mFile \u001b[39m\u001b[32mE:\\共享\\GoodMusicV2.0\\.venv\\Lib\\site-packages\\torch\\nn\\modules\\module.py:1750\u001b[39m, in \u001b[36mModule._call_impl\u001b[39m\u001b[34m(self, *args, **kwargs)\u001b[39m\n\u001b[32m   1745\u001b[39m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[32m   1746\u001b[39m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[32m   1747\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m._backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m._backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m._forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m._forward_pre_hooks\n\u001b[32m   1748\u001b[39m         \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[32m   1749\u001b[39m         \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[32m-> \u001b[39m\u001b[32m1750\u001b[39m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mforward_call\u001b[49m\u001b[43m(\u001b[49m\u001b[43m*\u001b[49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m   1752\u001b[39m result = \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[32m   1753\u001b[39m called_always_called_hooks = \u001b[38;5;28mset\u001b[39m()\n",
+      "\u001b[36mFile \u001b[39m\u001b[32mE:\\共享\\GoodMusicV2.0\\.venv\\Lib\\site-packages\\transformers\\models\\qwen2\\modeling_qwen2.py:276\u001b[39m, in \u001b[36mQwen2DecoderLayer.forward\u001b[39m\u001b[34m(self, hidden_states, attention_mask, position_ids, past_key_value, output_attentions, use_cache, cache_position, position_embeddings, **kwargs)\u001b[39m\n\u001b[32m    274\u001b[39m residual = hidden_states\n\u001b[32m    275\u001b[39m hidden_states = \u001b[38;5;28mself\u001b[39m.post_attention_layernorm(hidden_states)\n\u001b[32m--> \u001b[39m\u001b[32m276\u001b[39m hidden_states = \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mmlp\u001b[49m\u001b[43m(\u001b[49m\u001b[43mhidden_states\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m    277\u001b[39m hidden_states = residual + hidden_states\n\u001b[32m    279\u001b[39m outputs = (hidden_states,)\n",
+      "\u001b[36mFile \u001b[39m\u001b[32mE:\\共享\\GoodMusicV2.0\\.venv\\Lib\\site-packages\\torch\\nn\\modules\\module.py:1739\u001b[39m, in \u001b[36mModule._wrapped_call_impl\u001b[39m\u001b[34m(self, *args, **kwargs)\u001b[39m\n\u001b[32m   1737\u001b[39m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m._compiled_call_impl(*args, **kwargs)  \u001b[38;5;66;03m# type: ignore[misc]\u001b[39;00m\n\u001b[32m   1738\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m-> \u001b[39m\u001b[32m1739\u001b[39m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_call_impl\u001b[49m\u001b[43m(\u001b[49m\u001b[43m*\u001b[49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
+      "\u001b[36mFile \u001b[39m\u001b[32mE:\\共享\\GoodMusicV2.0\\.venv\\Lib\\site-packages\\torch\\nn\\modules\\module.py:1750\u001b[39m, in \u001b[36mModule._call_impl\u001b[39m\u001b[34m(self, *args, **kwargs)\u001b[39m\n\u001b[32m   1745\u001b[39m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[32m   1746\u001b[39m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[32m   1747\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m._backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m._backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m._forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m._forward_pre_hooks\n\u001b[32m   1748\u001b[39m         \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[32m   1749\u001b[39m         \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[32m-> \u001b[39m\u001b[32m1750\u001b[39m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mforward_call\u001b[49m\u001b[43m(\u001b[49m\u001b[43m*\u001b[49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m   1752\u001b[39m result = \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[32m   1753\u001b[39m called_always_called_hooks = \u001b[38;5;28mset\u001b[39m()\n",
+      "\u001b[36mFile \u001b[39m\u001b[32mE:\\共享\\GoodMusicV2.0\\.venv\\Lib\\site-packages\\transformers\\models\\qwen2\\modeling_qwen2.py:57\u001b[39m, in \u001b[36mQwen2MLP.forward\u001b[39m\u001b[34m(self, x)\u001b[39m\n\u001b[32m     56\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34mforward\u001b[39m(\u001b[38;5;28mself\u001b[39m, x):\n\u001b[32m---> \u001b[39m\u001b[32m57\u001b[39m     down_proj = \u001b[38;5;28mself\u001b[39m.down_proj(\u001b[38;5;28mself\u001b[39m.act_fn(\u001b[38;5;28mself\u001b[39m.gate_proj(x)) * \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mup_proj\u001b[49m\u001b[43m(\u001b[49m\u001b[43mx\u001b[49m\u001b[43m)\u001b[49m)\n\u001b[32m     58\u001b[39m     \u001b[38;5;28;01mreturn\u001b[39;00m down_proj\n",
+      "\u001b[36mFile \u001b[39m\u001b[32mE:\\共享\\GoodMusicV2.0\\.venv\\Lib\\site-packages\\torch\\nn\\modules\\module.py:1739\u001b[39m, in \u001b[36mModule._wrapped_call_impl\u001b[39m\u001b[34m(self, *args, **kwargs)\u001b[39m\n\u001b[32m   1737\u001b[39m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m._compiled_call_impl(*args, **kwargs)  \u001b[38;5;66;03m# type: ignore[misc]\u001b[39;00m\n\u001b[32m   1738\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m-> \u001b[39m\u001b[32m1739\u001b[39m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_call_impl\u001b[49m\u001b[43m(\u001b[49m\u001b[43m*\u001b[49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
+      "\u001b[36mFile \u001b[39m\u001b[32mE:\\共享\\GoodMusicV2.0\\.venv\\Lib\\site-packages\\torch\\nn\\modules\\module.py:1750\u001b[39m, in \u001b[36mModule._call_impl\u001b[39m\u001b[34m(self, *args, **kwargs)\u001b[39m\n\u001b[32m   1745\u001b[39m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[32m   1746\u001b[39m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[32m   1747\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m._backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m._backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m._forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m._forward_pre_hooks\n\u001b[32m   1748\u001b[39m         \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[32m   1749\u001b[39m         \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[32m-> \u001b[39m\u001b[32m1750\u001b[39m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mforward_call\u001b[49m\u001b[43m(\u001b[49m\u001b[43m*\u001b[49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m   1752\u001b[39m result = \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[32m   1753\u001b[39m called_always_called_hooks = \u001b[38;5;28mset\u001b[39m()\n",
+      "\u001b[36mFile \u001b[39m\u001b[32mE:\\共享\\GoodMusicV2.0\\.venv\\Lib\\site-packages\\torch\\nn\\modules\\linear.py:125\u001b[39m, in \u001b[36mLinear.forward\u001b[39m\u001b[34m(self, input)\u001b[39m\n\u001b[32m    124\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34mforward\u001b[39m(\u001b[38;5;28mself\u001b[39m, \u001b[38;5;28minput\u001b[39m: Tensor) -> Tensor:\n\u001b[32m--> \u001b[39m\u001b[32m125\u001b[39m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mF\u001b[49m\u001b[43m.\u001b[49m\u001b[43mlinear\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43minput\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mweight\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mbias\u001b[49m\u001b[43m)\u001b[49m\n",
+      "\u001b[31mKeyboardInterrupt\u001b[39m: "
+     ]
+    }
+   ],
+   "source": [
+    "# Trainer   trainer = GRPOTrainer(    \n",
+    "# model=model,   \n",
+    "# reward_funcs=[reward_len], \n",
+    "# args=training_args,    \n",
+    "# train_dataset=dataset[\"train\"],  \n",
+    "# )      \n",
+    "# Train model  \n",
+    "# wandb.init(project=\"GRPO\") \n",
+    "# trainer.train()\n",
+    "trainer = GRPOTrainer(\n",
+    "    model=model,\n",
+    "    reward_funcs=[compute_rewards],\n",
+    "    args=config,\n",
+    "    train_dataset=dataset\n",
+    ")\n",
+    "trainer.train()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "f621c33533e55b00",
+   "metadata": {},
+   "source": [
+    "# 评估"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d17d0e3eb9069545",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import matplotlib.pyplot as plt\n",
+    "from datetime import datetime\n",
+    "\n",
+    "def plot_training_metrics(losses, kls, avg_rewards, output_dir=\".\"):\n",
+    "    \"\"\"\n",
+    "    绘制并保存训练指标图表\n",
+    "    \n",
+    "    参数:\n",
+    "        losses: 训练损失列表\n",
+    "        kls: KL散度列表\n",
+    "        avg_rewards: 平均奖励列表\n",
+    "        output_dir: 输出目录路径\n",
+    "    \"\"\"\n",
+    "    # 生成带时间戳的唯一文件名\n",
+    "    timestamp = datetime.now().strftime(\"%Y%m%d_%H%M%S\")\n",
+    "    output_path = f\"{output_dir}/training_curves_{timestamp}.png\"\n",
+    "    \n",
+    "    # 创建画布\n",
+    "    plt.figure(figsize=(15, 5), dpi=300)\n",
+    "    \n",
+    "    # 1. Loss 曲线\n",
+    "    plt.subplot(1, 3, 1)\n",
+    "    plt.plot(losses, label=\"Loss\", linewidth=1.5, color='blue')\n",
+    "    plt.title(\"Training Loss\", fontsize=10)\n",
+    "    plt.xlabel(\"Step\", fontsize=9)\n",
+    "    plt.ylabel(\"Loss\", fontsize=9)\n",
+    "    plt.grid(True, alpha=0.3)\n",
+    "    \n",
+    "    # 2. KL 散度曲线\n",
+    "    plt.subplot(1, 3, 2)\n",
+    "    plt.plot(kls, label=\"KL Divergence\", linewidth=1.5, color='orange')\n",
+    "    plt.title(\"KL Divergence\", fontsize=10)\n",
+    "    plt.xlabel(\"Step\", fontsize=9)\n",
+    "    plt.ylabel(\"KL Divergence\", fontsize=9)\n",
+    "    plt.grid(True, alpha=0.3)\n",
+    "    \n",
+    "    # 3. 平均奖励曲线\n",
+    "    plt.subplot(1, 3, 3)\n",
+    "    plt.plot(avg_rewards, label=\"Avg Reward\", linewidth=1.5, color='green')\n",
+    "    plt.title(\"Average Reward\", fontsize=10)\n",
+    "    plt.xlabel(\"Step\", fontsize=9)\n",
+    "    plt.ylabel(\"Reward\", fontsize=9)\n",
+    "    plt.grid(True, alpha=0.3)\n",
+    "    \n",
+    "    # 调整布局并保存\n",
+    "    plt.tight_layout()\n",
+    "    plt.savefig(\n",
+    "        output_path,\n",
+    "        bbox_inches='tight',\n",
+    "        facecolor='white',\n",
+    "        dpi=300\n",
+    "    )\n",
+    "    plt.close()\n",
+    "    \n",
+    "    print(f\"训练指标图表已保存至: {output_path}\")\n",
+    "\n",
+    "# 使用示例 (假设你已经有了这些数据)\n",
+    "# losses = [...]  # 你的损失数据\n",
+    "# kls = [...]     # 你的KL散度数据\n",
+    "# avg_rewards = [...]  # 你的平均奖励数据\n",
+    "# plot_training_metrics(losses, kls, avg_rewards)\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b6a739a2f9d0a343",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class MetricsCallback(TrainerCallback):\n",
+    "    def __init__(self):\n",
+    "        super().__init__()\n",
+    "        self.metrics = {\n",
+    "            'loss': [], \n",
+    "            'kl_divergence': [], \n",
+    "            'avg_reward': []\n",
+    "        }\n",
+    "    \n",
+    "    def on_log(self, args, state, control, logs=None, **kwargs):\n",
+    "        if logs is not None:\n",
+    "            if 'loss' in logs:\n",
+    "                self.metrics['loss'].append(logs['loss'])\n",
+    "            if 'kl_divergence' in logs:\n",
+    "                self.metrics['kl_divergence'].append(logs['kl_divergence'])\n",
+    "            if 'rewards' in logs:  # 假设返回的是列表，取其平均值\n",
+    "                avg_reward = sum(logs['rewards'])/len(logs['rewards'])\n",
+    "                self.metrics['avg_reward'].append(avg_reward)\n",
+    "                \n",
+    "                "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "14cee34aa3bb165",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "plot_training_metrics(metrics_callback.metrics['loss'],metrics_callback.metrics['kl_divergence'],metrics_callback.metrics['avg_reward'])"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": ".venv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 2
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython2",
+   "version": "3.11.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

code/LORA.py ADDED Viewed

	@@ -0,0 +1,89 @@

+from datasets import Dataset
+from transformers import AutoTokenizer, AutoModelForCausalLM, Trainer, TrainingArguments
+from peft import LoraConfig, get_peft_model, PeftModel
+raw_data_path = ""#替换为对应的数据集路径
+with open(raw_data_path, "r", encoding="utf-8") as f:
+    raw_lines = f.readlines()
+def process_line(line):
+    segments = line.strip().split("/")
+    return "/".join(segments[:-1]) if len(segments) > 1 else line.strip()
+processed_samples = [process_line(line) for line in raw_lines if line.strip()]
+dataset = Dataset.from_dict({"text": processed_samples})
+model_name = ""#替换为对应的模型路径
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+model = AutoModelForCausalLM.from_pretrained(model_name)
+lora_config = LoraConfig(
+    r=8,                  # 低秩矩阵的秩，通常取 8、16 或 32
+    lora_alpha=32,         # 缩放因子，控制 LoRA 的影响
+    target_modules=["q_proj", "v_proj"],  # 应用 LoRA 的模块，通常是注意力层的投影
+    lora_dropout=0.1,      # Dropout 概率，防止过拟合
+    bias="none",           # 是否训练偏置，通常设为 "none"
+    task_type="CAUSAL_LM"  # 任务类型，对于因果语言模型使用 "CAUSAL_LM"
+)
+model = get_peft_model(model, lora_config)
+def tokenize_function(examples):
+    # 预定义固定的提示词
+    prompt = "根据以下关键词生成一首歌词，歌词中包含多个句子，句子与句子之间使用/隔开，让我们一步一步的思考（思考过程包含在<think>和</think>之间）："
+    # 在原文本前面加上提示词
+    modified_texts = [prompt + text for text in examples["text"]]
+    # 进行分词
+    tokenized = tokenizer(modified_texts, truncation=True, padding="max_length", max_length=256)
+    # 复制 input_ids 作为 labels
+    tokenized["labels"] = tokenized["input_ids"].copy()
+    return tokenized
+tokenized_dataset = dataset.map(tokenize_function, batched=True)
+training_args = TrainingArguments(
+    output_dir="./lora",
+    num_train_epochs=8,
+    per_device_train_batch_size=10,
+    learning_rate=2e-5,
+    weight_decay=0.01,
+    logging_steps=10000,
+    save_steps=15000,
+    fp16=True,
+)
+trainer = Trainer(
+    model=model,
+    args=training_args,
+    train_dataset=tokenized_dataset,
+    tokenizer=tokenizer,
+)
+trainer.train()
+# 推理示例
+generation_config = {
+            "max_new_tokens": 1024,
+            "temperature": 1.0,
+            "top_p": 0.9,
+            "top_k": 40,
+            "repetition_penalty": 1.2,
+            "do_sample": True,
+            "encoder_no_repeat_ngram_size": 4,
+        }
+if True:
+    prompt = "根据以下关键词生成一首歌词，歌词中包含多个句子，句子与句子之间使用/隔开，让我们一步一步的思考（思考过程包含在<think>和</think>之间）：温柔，轮廓，洒脱："
+    input_ids = tokenizer.encode(prompt, return_tensors="pt").to(model.device)
+    outputs = model.generate(input_ids, **generation_config)
+    decoded = tokenizer.decode(outputs[0], skip_special_tokens=False)
+print(decoded)
+model.save_pretrained("")#替换为对应的保存路径

code/LORA_with_CoT.py ADDED Viewed

	@@ -0,0 +1,119 @@

+import re
+from datasets import Dataset
+from transformers import AutoTokenizer, AutoModelForCausalLM, Trainer, TrainingArguments
+from peft import LoraConfig, get_peft_model, PeftModel
+from transformers import AutoModelForCausalLM, AutoTokenizer
+model_name = "Qwen/Qwen2.5-0.5B-Instruct"
+base_model  = AutoModelForCausalLM.from_pretrained(
+    model_name,
+    torch_dtype="auto",
+    device_map="auto"
+)
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+# 加载数据
+raw_data_path = r"data/CoTdata.txt"
+with open(raw_data_path, "r", encoding="utf-8") as f:
+    raw_lines = f.readlines()
+# 处理每一行数据，解析出关键词、思维链和诗歌内容
+def process_line(line):
+    # 使用 [：:] 同时匹配中文和英文冒号
+    pattern = r"^(.*?)<think>(.*?)</think>[：:](.*)$"
+    match = re.match(pattern, line.strip())
+    if match:
+        keywords = match.group(1).strip()
+        cot = match.group(2).strip()
+        poem = match.group(3).strip()
+        # 构造训练实例：输入部分给出提示和关键词，输出部分包含完整思维链及答案
+        training_text = (
+            f"【输入】：根据以下关键词生成一首歌词，歌词中包含多个句子，确保句子通顺、诗意、格式正确。"
+            f"让我们一步一步的思考（思考过程包含在<think>和</think>之间）：{keywords}\n\n"
+            f"【输出】：<think>{cot}</think>\n{poem}"
+        )
+        return training_text
+    else:
+        # 如果格式不符，输出提示并返回 None
+        print("跳过格式错误的行：", line.strip())
+        return None
+# 解析所有数据行
+processed_samples = []
+for line in raw_lines:
+    result = process_line(line)
+    if result:
+        processed_samples.append(result)
+# 构建 Hugging Face 数据集
+dataset = Dataset.from_dict({"text": processed_samples})
+# 加载基础模型和 LoRA 模型
+model = PeftModel.from_pretrained(base_model, r"D:\GoodMusicV3.0\3_24_LoRA").to("cuda")  # 替换为你的 LoRA 路径
+tokenizer.pad_token = tokenizer.eos_token
+lora_config = LoraConfig(
+    r=8,                  # 低秩矩阵的秩，常取 8、16 或 32
+    lora_alpha=32,        # 缩放因子，控制 LoRA 影响
+    target_modules=["q_proj", "k_proj", "v_proj"],  # 应用 LoRA 的模块，通常是注意力层的投影
+    lora_dropout=0.1,     # Dropout 概率，防止过拟合
+    bias="none",       # 通常设为 "none"
+    task_type="CAUSAL_LM"
+)
+model = get_peft_model(model, lora_config)
+model.cuda()
+# 分词函数：对文本进行分词，并构造 labels
+def tokenize_function(examples):
+    # 此处的文本已经包含了输入和输出的完整内容
+    tokenized = tokenizer(examples["text"], truncation=True, padding="max_length", max_length=256)
+    tokenized["labels"] = tokenized["input_ids"].copy()
+    return tokenized
+# 对数据集进行映射处理
+tokenized_dataset = dataset.map(tokenize_function, batched=True)
+# 设置训练参数
+training_args = TrainingArguments(
+    output_dir="./lora",
+    num_train_epochs=1000,
+    per_device_train_batch_size=16,
+    learning_rate=2e-5,
+    weight_decay=0.01,
+    logging_steps=10000,
+    save_steps=15000,
+    fp16=True,
+)
+# 构造 Trainer
+trainer = Trainer(
+    model=model,
+    args=training_args,
+    train_dataset=tokenized_dataset,
+    tokenizer=tokenizer,
+)
+# 开始训练
+trainer.train()
+# 推理示例
+generation_config = {
+    "max_new_tokens": 1024,
+    "temperature": 1.0,
+    "top_p": 0.9,
+    "top_k": 40,
+    "repetition_penalty": 1.2,
+    "do_sample": True,
+    "encoder_no_repeat_ngram_size": 4,
+}
+if True:
+    prompt = "根据以下关键词生成一首歌词，歌词中包含多个句子，句子与句子之间使用/隔开，让我们一步一步的思考（思考过程包含在<think>和</think>之间）：温柔，轮廓，洒脱："
+    input_ids = tokenizer.encode(prompt, return_tensors="pt").to(model.device)
+    outputs = model.generate(input_ids, **generation_config)
+    decoded = tokenizer.decode(outputs[0], skip_special_tokens=False)
+print(decoded)
+# 保存模型
+model.save_pretrained("4_2_LoRA_3")

code/UI.py ADDED Viewed

	@@ -0,0 +1,181 @@

+import sys
+from PyQt5.QtWidgets import QApplication, QWidget, QVBoxLayout, QPushButton, QTextEdit, QLineEdit, QListWidget, QLabel, QHBoxLayout, QListWidgetItem
+import _MyModel
+from PyQt5.QtGui import QColor, QPalette
+from PyQt5.QtCore import Qt
+class ChatSession:
+    """储存单个对话的内容"""
+    def __init__(self, topic="新对话"):
+        self.topic = topic
+        self.messages = []  # 存储聊天记录
+    def add_message(self, sender, text):
+        """添加消息（sender: 'user' 或 'ai'）"""
+        self.messages.append((sender, text))
+class ChatGPTUI(QWidget):
+    def __init__(self, MyModel):
+        super().__init__()
+        self.model = MyModel
+        self.first_list_item = QListWidget()
+        self.setWindowTitle("ChatGPT 聊天界面")
+        self.setGeometry(200, 200, 800, 600)
+        self.setStyleSheet("background-color: #DCB272; color: white;")  # 设置深色背景
+        #self.setWindowFlags(Qt.FramelessWindowHint) # 设置无边框
+        # 创建主布局
+        main_layout = QHBoxLayout(self)
+        # 左侧：
+        left_layout = QVBoxLayout()
+        # 添加“新建对话”按钮
+        self.new_chat_button = QPushButton("新建对话")
+        self.new_chat_button.setStyleSheet("background-color: #0FA958; color: white; padding: 8px; border-radius: 5px;")
+        self.new_chat_button.clicked.connect(self.create_new_chat)
+        left_layout.addWidget(self.new_chat_button)
+        # 左侧：对话历史列表
+        self.history_list = QListWidget()
+        self.history_list.setStyleSheet("background-color: #E4DECE; color: black; border: none;")
+        self.history_list.itemClicked.connect(self.load_selected_chat)  # 绑定选择对话事件
+        left_layout.addWidget(self.history_list)
+        # 右侧：聊天区域
+        right_layout = QVBoxLayout()
+        # 对话主题输入框
+        self.topic_input = QLineEdit()
+        self.topic_input.setPlaceholderText("请输入对话主题...")
+        self.topic_input.setStyleSheet("background-color: #E4DECE; color: black; padding: 5px; border-radius: 5px;")
+        # 聊天显示区域
+        self.chat_display = QTextEdit()
+        self.chat_display.setReadOnly(True)
+        self.chat_display.setStyleSheet("background-color: #E4DECE; color: black; border: none; padding: 10px;")
+        right_layout.addWidget(self.chat_display, 7)
+        # 输入区域（水平布局）
+        input_layout = QHBoxLayout()
+        # 用户输入框
+        self.input_field = QLineEdit()
+        self.input_field.setPlaceholderText("输入消息...")
+        self.input_field.setStyleSheet("background-color: #E4DECE; color: black; padding: 5px; border-radius: 5px;")
+        input_layout.addWidget(self.input_field, 8)
+        self.input_field.returnPressed.connect(self.send_message)
+        # 发送按钮
+        self.send_button = QPushButton("发送")
+        self.send_button.setStyleSheet("background-color: #DA8D6D; color: white; padding: 8px; border-radius: 5px;")
+        self.send_button.clicked.connect(self.send_message)
+        input_layout.addWidget(self.send_button, 2)
+        right_layout.addLayout(input_layout)
+        # 将右侧布局添加到主布局
+        main_layout.addLayout(left_layout, 2)
+        main_layout.addLayout(right_layout, 8)
+        self.setLayout(main_layout)
+        # 初始对话存储
+        self.chat_sessions = []  # 存储多个会话
+        self.current_session = None
+        self.create_new_chat()  # 启动时创建默认对话
+    def create_new_chat(self):
+        """新建对话并添加到历史列表"""
+        topic = self.topic_input.text().strip()
+        if not topic:
+            topic = "新对话"
+        new_session = ChatSession(topic)
+        self.chat_sessions.append(new_session)
+        self.current_session = new_session
+        # 更新左侧历史对话列表
+        self.add_chat_item(topic)
+        self.history_list.setCurrentRow(self.history_list.count() - 1)  # 选中新建的对话
+        self.chat_display.clear()
+    def load_selected_chat(self):
+        """切换到用户选择的历史对话"""
+        selected_index = self.history_list.currentRow()
+        if selected_index >= 0:
+            self.current_session = self.chat_sessions[selected_index]
+            self.display_chat_history()
+    def display_chat_history(self):
+        """显示当前会话的聊天记录"""
+        self.chat_display.clear()
+        for sender, text in self.current_session.messages:
+            if sender == 'user':
+                self.chat_display.append(f"<b><span style='color: #9b7438; font-family: 微软雅黑; font-size: 28px'>主题 : </span><span style='color: #1B2131; font-family: 微软雅黑; font-size: 28px'> {text}</span></b>")
+            else:
+                self.chat_display.append(f"<b>{'用户' if sender == 'user' else 'ChatGPT'}:</b> {text}")
+    def send_message(self):
+        """发送用户输入的消息"""
+        user_text = self.input_field.text().strip()
+        if user_text and self.current_session:
+            self.current_session.add_message("user", user_text)
+            self.chat_display.append(f"<b><span style='color: #9b7438; font-family: 微软雅黑; font-size: 28px'>主题 : </span><span style='color: #1B2131; font-family: 微软雅黑; font-size: 28px'> {user_text}</span></b>")
+            self.input_field.clear()
+            # 触发 AI 回复（暂时用占位内容）
+            ai_reply = self.get_ai_response(user_text)
+            self.receive_message(ai_reply)
+    def receive_message(self, text):
+        """显示 AI 回复"""
+        if self.current_session:
+            self.current_session.add_message("ai", text)
+            self.chat_display.append(f"<b>ChatGPT:</b> {text}")
+    def get_ai_response(self, user_input):
+        """可在此接入 AI 模型，如 OpenAI API 或本地大模型"""
+        output = self.model.predict(user_input)
+        return f"<span style='font-size: 20px;'>{output}</span>"
+    def add_chat_item(self, text):
+        """ 添加带删除按钮的聊天记录项 """
+        item_widget = QWidget()
+        item_layout = QHBoxLayout(item_widget)
+        item_layout.setContentsMargins(5, 2, 5, 2)
+        label = QLabel(text)
+        delete_button = QPushButton("×")
+        delete_button.setFixedSize(20, 20)
+        delete_button.setStyleSheet("background-color: #cc6666; color: white; border-radius: 10px;")
+        item_layout.addWidget(label)
+        item_layout.addWidget(delete_button)
+        item_layout.addStretch()
+        list_item = QListWidgetItem(self.history_list)
+        list_item.setSizeHint(item_widget.sizeHint())
+        self.history_list.addItem(list_item)
+        self.history_list.setItemWidget(list_item, item_widget)
+        # 绑定删除事件
+        delete_button.clicked.connect(lambda: self.remove_chat_item(list_item))
+        self.first_list_item = list_item
+    def remove_chat_item(self, item):
+        """ 删除聊天记录项 """
+        row = self.history_list.row(item)
+        del self.chat_sessions[row]
+        self.history_list.takeItem(row)
+# 运行 PyQt5 应用
+if __name__ == "__main__":
+    app = QApplication(sys.argv)
+    window = ChatGPTUI()
+    window.show()
+    window.remove_chat_item(window.first_list_item)
+    window.create_new_chat()
+    sys.exit(app.exec_())

code/_MyModel.py ADDED Viewed

	@@ -0,0 +1,24 @@

+from peft import LoraConfig, get_peft_model, PeftModel
+from transformers import AutoTokenizer, AutoModelForCausalLM
+class MyModel():
+    def __init__(self):
+        model_name = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
+        lora_path = "DS_RL_model"
+        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
+        model = AutoModelForCausalLM.from_pretrained(model_name)
+        self.model = PeftModel.from_pretrained(model, lora_path)
+        self.generation_config = {
+            "max_new_tokens": 2048,
+            "temperature": 0.9,
+            "top_p": 1.0,
+            "repetition_penalty": 1.2,
+        }
+    def predict(self, text):
+        prompt = "根据以下关键词生成一首歌词，歌词中包含多个句子，句子与句子之间使用/隔开，让我们一步一步的思考（思考过程包含在<think>和</think>之间）：" + text
+        input_ids = self.tokenizer.encode(prompt, return_tensors="pt").to(self.model.device)
+        outputs = self.model.generate(input_ids, **self.generation_config)
+        decoded = self.tokenizer.decode(outputs[0], skip_special_tokens=False)
+        return decoded
+    #诗，样子，天地：

code/__main__.py ADDED Viewed

	@@ -0,0 +1,15 @@

+import sys
+import _MyModel
+from UI import QApplication, ChatGPTUI
+import os
+os.environ["QT_QPA_PLATFORM_PLUGIN_PATH"] = "D:\不会编程\Machine_Learning\class_project\project\.venv\Lib\site-packages\PyQt5\Qt5\plugins"
+if __name__ == '__main__':
+    myModel = _MyModel.MyModel()
+    app = QApplication(sys.argv)
+    window = ChatGPTUI(myModel)
+    window.show()
+    window.remove_chat_item(window.first_list_item)
+    window.create_new_chat()
+    sys.exit(app.exec_())

code/__pycache__/UI.cpython-311.pyc ADDED Viewed

Binary file (12 kB). View file

code/__pycache__/_MyModel.cpython-311.pyc ADDED Viewed

Binary file (2.11 kB). View file

code/__pycache__/deepseek_vaule.cpython-311.pyc ADDED Viewed

Binary file (10 kB). View file

code/__pycache__/reward.cpython-311.pyc ADDED Viewed

Binary file (5.73 kB). View file

code/__pycache__/train_nessary.cpython-311.pyc ADDED Viewed

Binary file (8.72 kB). View file

code/data_process.py ADDED Viewed

	@@ -0,0 +1,62 @@

+import re
+def contains_chinese(text):
+    """
+    Unicode 范围 \u4e00-\u9fff 包含常见的汉字
+    """
+    return re.search(r'[\u4e00-\u9fff]', text) is not None
+def process_lyrics(text):
+    """
+    处理歌词文本：
+      1. 按 '/' 分割
+      2. 去除空白及空行
+      3. 过滤掉不包含中文（视为英文）的歌词
+      4. 去除重复歌词（保持原始顺序）
+    """
+    # 使用 '/' 分割字符串得到歌词列表
+    lyrics = text.split('/')
+    processed = []
+    seen = set()
+    for line in lyrics:
+        # 去除两端空白
+        line = line.strip()
+        # 如果为空则跳过
+        if not line:
+            continue
+        # 如果这句歌词不包含中文，则视为英文歌词，跳过
+        if not contains_chinese(line):
+            continue
+        if len(line) < 3:
+            continue
+        # 去重：如果该句未出现过，则添加到结果中
+        if line not in seen:
+            seen.add(line)
+            processed.append(line)
+    return processed
+def main():
+    input_filename = 'data\lyrics.txt'
+    output_filename = 'data\processed_data.txt'
+    # 读取原始数据文件，建议使用 utf-8 编码
+    with open(input_filename, 'r', encoding='utf-8') as f:
+        content = f.read()
+    # 处理歌词数据
+    processed = process_lyrics(content)
+    # 处理后的数据以 '/' 重新拼接，也可以改成每行一个
+    output_content = '/'.join(processed)
+    # 将处理后的数据写入输出文件
+    with open(output_filename, 'w', encoding='utf-8') as f:
+        f.write(output_content)
+    print(f'处理完成，结果保存在 {output_filename}')
+if __name__ == '__main__':
+    main()

code/deepseek_vaule.py ADDED Viewed

	@@ -0,0 +1,187 @@

+import os
+import openai
+from openai import APIError
+from typing import Dict, List, Union
+# 自定义异常
+class InsufficientBalanceError(Exception):
+    pass
+class EvaluationError(Exception):
+    pass
+# 系统提示词 - 更详细的评分标准
+SYS_PROMPT = """你是一个专业的文本质量评估专家。请根据以下标准对文本进行评分(满分10分):
+1. 创意性(权重25%): 内容的原创性和新颖性
+2. 文采(权重25%): 语言表达的优美程度和修辞手法
+3. 格式(权重25%): 结构清晰度、可读性和符合要求的格式
+4. 长度(权重25%): 内容长度是否适中(50-300字为佳)
+5. 总分(根据四个维度进行加权计算)
+评分要求:
+- 使用表格形式输出,得到得分表格.
+- 每项评分保留1位小数
+- 最后简要对目标文本的评价，而不是让你自己再写一个，切记
+"""
+def evaluate_text_quality(
+    text: str,
+    api_key: str = None,
+    model: str = "deepseek-chat",
+    temperature: float = 0.3,
+    max_tokens: int = 300
+) -> Dict[str, Union[float, str]]:
+    # 获取API密钥
+    api_key = api_key or os.getenv("you_api_key")
+    if not api_key:
+        raise ValueError("DeepSeek API密钥未提供")
+    # 创建客户端
+    client = openai.OpenAI(
+        api_key=api_key,
+        base_url="https://api.deepseek.com/v1"
+    )
+    try:
+        # 调用API
+        response = client.chat.completions.create(
+            model=model,
+            messages=[
+                {"role": "system", "content": SYS_PROMPT},
+                {"role": "user", "content": text}
+            ],
+            temperature=temperature,
+            max_tokens=max_tokens,
+            stream=False
+        )
+        # 解析结果
+        output = response.choices[0].message.content.strip()
+        # 从API响应中提取评分
+        return parse_evaluation_result(output)
+    except APIError as e:
+        if e.status_code == 402:  # 假设402为余额不足状态码
+            raise InsufficientBalanceError("API余额不足,请充值") from e
+        else:
+            raise EvaluationError(f"API错误[{e.status_code}]: {e.message}") from e
+    except Exception as e:
+        raise EvaluationError(f"评估失败: {str(e)}") from e
+def parse_evaluation_result(output: str) -> Dict[str, Union[float, str]]:
+    """
+    改进后的评估结果解析函数，能更好处理中文评分表格
+    """
+    result = {
+        "scores": {
+            "creativity": 0.0,
+            "language": 0.0,
+            "format": 0.0,
+            "length": 0.0,
+            "total": 0.0
+        },
+        "evaluation": output  # 默认保留全部输出
+    }
+    # 改进的表格解析逻辑
+    lines = [line.strip() for line in output.split('\n') if line.strip()]
+    for line in lines:
+        # 处理创意性评分
+        if "创意性" in line:
+            result["scores"]["creativity"] = extract_score_from_line(line)
+        # 处理文采评分
+        elif any(key in line for key in ["文采", "语言表达"]):
+            result["scores"]["language"] = extract_score_from_line(line)
+        # 处理格式评分
+        elif "格式" in line:
+            result["scores"]["format"] = extract_score_from_line(line)
+        # 处理长度评分
+        elif "长度" in line:
+            result["scores"]["length"] = extract_score_from_line(line)
+        # 处理总分
+        elif any(key in line for key in ["总分", "总计", "平均"]):
+            result["scores"]["total"] = extract_score_from_line(line)
+    # 提取评价部分（从"评价："之后的内容）
+    evaluation_lines = []
+    found_evaluation = False
+    for line in lines:
+        if any(prefix in line for prefix in ["评价：", "评语：", "总结："]):
+            found_evaluation = True
+            line = line.split("：", 1)[-1].strip()
+        if found_evaluation and line:
+            evaluation_lines.append(line)
+    if evaluation_lines:
+        result["evaluation"] = "\n".join(evaluation_lines)
+    return result
+def extract_score_from_line(line: str) -> float:
+    """
+    改进的分数提取函数，能处理多种表格格式
+    """
+    try:
+        # 处理 | 创意性 | 8.5 | 这种格式
+        if "|" in line:
+            parts = [p.strip() for p in line.split("|") if p.strip()]
+            for part in parts:
+                if part.replace('.', '').isdigit():
+                    return float(part)
+        # 处理 "创意性: 8.5" 这种格式
+        if ":" in line or "：" in line:
+            parts = line.split(":", 1) if ":" in line else line.split("：", 1)
+            num_part = parts[-1].strip()
+            for s in num_part.split():
+                s = s.replace('/', '').replace('分', '')
+                if s.replace('.', '').isdigit():
+                    return float(s)
+        # 直接搜索数字
+        for word in line.split():
+            word = word.replace('分', '').replace('/', '')
+            if word.replace('.', '').isdigit():
+                return float(word)
+    except (ValueError, IndexError):
+        pass
+    return 0.0
+def print_evaluation_result(
+    evaluation: Dict[str, Union[float, str]],
+    show_details: bool = True,
+    score_only: bool = False
+) -> None:
+    """
+    打印评估结果
+    参数:
+        evaluation: evaluate_text_quality返回的评估结果字典
+        show_details: 是否显示详细评价
+        score_only: 是否仅显示分数(优先级高于show_details)
+    """
+    if not evaluation:
+        print("无有效评估结果")
+        return
+    scores = evaluation.get("scores", {})
+    evaluation_text = evaluation.get("evaluation", "")
+    # 打印分数摘要
+    print("\n=== 文本质量评估 ===")
+    print(f"[创意性] {scores.get('creativity', 0.0):.1f}/10")
+    print(f"[文采]    {scores.get('language', 0.0):.1f}/10")
+    print(f"[格式]    {scores.get('format', 0.0):.1f}/10")
+    print(f"[长度]    {scores.get('length', 0.0):.1f}/10")
+    print("-" * 25)
+    print(f"[总分]    {scores.get('total', 0.0):.1f}/10")
+    # 根据参数决定是否显示详细评价
+    if not score_only and show_details and evaluation_text:
+        print("\n=== 详细评价 ===")
+        print(evaluation_text)

code/getCOT.py ADDED Viewed

	@@ -0,0 +1,149 @@

+import os
+import openai
+import threading
+from concurrent.futures import ThreadPoolExecutor
+from openai import APIError
+API_KEY = os.getenv("DEEPSEEK_API_KEY", "your_api_key")
+class ThreadSafeWriter:
+    """线程安全写入器"""
+    def __init__(self, output_path: str):
+        self.file = open(output_path, 'a+', encoding='utf-8')
+        self.lock = threading.Lock()
+        self.counter = 0
+    def write_line(self, content: str):
+        with self.lock:
+            self.file.write(content + '\n')
+            self.file.flush()
+            self.counter += 1
+    def get_progress(self):
+        with self.lock:
+            return self.counter
+    def close(self):
+        self.file.close()
+class DeepSeekBatchProcessor:
+    def __init__(self, max_workers: int = 100):
+        self.client = openai.OpenAI(
+            api_key=API_KEY,
+            base_url="https://api.deepseek.com/v1"
+        )
+        self.max_workers = max_workers
+        self.error_flag = threading.Event()
+        self.rate_limiter = threading.Semaphore(20)
+    def process_batch(self, batch, writer: ThreadSafeWriter):
+        """批量处理，每个任务单独线程"""
+        futures = []
+        with ThreadPoolExecutor(max_workers=self.max_workers) as executor:
+            for line_num, line in batch:
+                if self.error_flag.is_set():
+                    break
+                futures.append(
+                    executor.submit(
+                        self._process_single_line,
+                        line_num,
+                        line,
+                        writer
+                    )
+                )
+            for future in futures:
+                future.result()
+    def _process_single_line(self, line_num: int, line: str, writer: ThreadSafeWriter):
+        if self.error_flag.is_set():
+            return
+        # 支持英文冒号(:)和中文全角冒号(：)
+        separator = None
+        if ':' in line:
+            separator = ':'
+        elif '：' in line:
+            separator = '：'
+        if not separator:
+            print(f"\n行 {line_num} 格式错误")
+            writer.write_line(f"格式错误：{line}")
+            return
+        keywords_part, original_text = line.split(separator, 1)
+        # 这里只提取关键词部分（例如“风，雾，寂寞”）
+        keywords = [kw.strip() for kw in keywords_part.split("，") if kw.strip()]
+        if not keywords:
+            keywords = ["无关键词"]
+        # 构造提示：根据关键词生成诗歌
+        prompt = "请根据以下关键词写一首诗：" + "，".join(keywords)
+        messages = [{"role": "user", "content": prompt}]
+        retries = 0
+        while retries < 3 and not self.error_flag.is_set():
+            try:
+                with self.rate_limiter:
+                    response = self.client.chat.completions.create(
+                        model="deepseek-reasoner",
+                        messages=messages,
+                        temperature=0.1
+                    )
+                # 提取返回中的思考过程和诗歌原文
+                reasoning_content = response.choices[0].message.reasoning_content.replace('\n', '').replace('\r', '')
+                poem_original = response.choices[0].message.content.replace('\n', '/').replace('\r', '')
+                # 拼接最终结果：关键词<think>思考过程</think>:诗歌原文
+                final_line = f"{'，'.join(keywords)}<think>{reasoning_content}</think>:{poem_original}"
+                writer.write_line(final_line)
+                progress = writer.get_progress()
+                print(f"\r已处理 {progress} 条", end='')
+                break
+            except APIError as e:
+                if e.status_code == 402:
+                    print(f"\n行 {line_num} 处理失败：API余额不足")
+                    self.error_flag.set()
+                    return
+                elif e.status_code == 429:
+                    print(f"\n行 {line_num} 速率受限，重试中...")
+                    retries += 1
+                    if retries >= 3:
+                        print(f"\n行 {line_num} 重试次数耗尽")
+                else:
+                    print(f"\n行 {line_num} API错误[{e.status_code}]：{e.message}")
+                    return
+            except Exception as e:
+                print(f"\n行 {line_num} 处理异常：{str(e)}")
+                retries += 1
+                if retries >= 3:
+                    print(f"\n行 {line_num} 重试次数耗尽")
+        if retries >= 3 and not self.error_flag.is_set():
+            writer.write_line(f"处理失败：{line}")
+def process_first_1000_lines(input_path: str, output_path: str, max_workers: int = 100):
+    """仅读取前1000行数据，并使用多线程处理"""
+    processor = DeepSeekBatchProcessor(max_workers)
+    writer = ThreadSafeWriter(output_path)
+    batch = []
+    try:
+        with open(input_path, 'r', encoding='utf-8') as f:
+            for line_num, line in enumerate(f, 1):
+                if not line.strip():
+                    continue
+                batch.append( (line_num, line.strip()) )
+                if line_num >= 1000:
+                    break
+        total = len(batch)
+        print(f"总数据量：{total} 条")
+        processor.process_batch(batch, writer)
+        print("\n处理完成！")
+    finally:
+        writer.close()
+if __name__ == '__main__':
+    input_file = "data/DSdata.txt"
+    output_file = "data/CoTdata.txt"
+    process_first_1000_lines(input_file, output_file, max_workers=100)

code/reward.py ADDED Viewed

	@@ -0,0 +1,147 @@

+import re
+import numpy as np
+from typing import List, Dict, Union, Optional
+from sentence_transformers import SentenceTransformer, util
+from multiprocessing import Pool, cpu_count
+# 全局初始化 SentenceTransformer 模型，并移动到 GPU
+embedder = SentenceTransformer('paraphrase-multilingual-MiniLM-L12-v2').to("cuda")
+def compute_rewards(
+        completions: List[str],
+        min_len: Union[int, List[int]] = 100,
+        max_len: Union[int, List[int]] = 300,
+        weights: Union[tuple, List[tuple]] = (0.25, 0.25, 0.25, 0.25),
+        return_components: bool = False,
+        **kwargs
+) -> Union[List[float], Dict[str, List[float]]]:
+    """并行优化的奖励计算函数"""
+    keywords = kwargs["keywords"]
+    n_samples = len(completions)
+    min_len = _to_list(min_len, n_samples)
+    max_len = _to_list(max_len, n_samples)
+    weights = _to_list(weights, n_samples)
+    # 并行计算各子奖励
+    with Pool(cpu_count()) as pool:
+        length_rewards = pool.starmap(_length_reward, zip(completions, min_len, max_len))
+        format_rewards = pool.map(_format_reward, completions)
+        keyword_rewards = _batch_keyword_reward(completions, keywords)  # 这个用 GPU 计算
+        language_rewards = pool.map(_language_reward, completions)
+    # 加权求和总奖励
+    total_rewards = [
+        w[0] * lr + w[1] * fr + w[2] * kr + w[3] * lang_r
+        for w, lr, fr, kr, lang_r in zip(weights, length_rewards, format_rewards, keyword_rewards, language_rewards)
+    ]
+    if return_components:
+        return {
+            "rewards": total_rewards,
+            "length_rewards": length_rewards,
+            "format_rewards": format_rewards,
+            "keyword_rewards": keyword_rewards,
+            "language_rewards": language_rewards,
+        }
+    return total_rewards
+# -------------- 并行子函数 --------------
+def _to_list(val: Union[any, List[any]], n: int) -> List[any]:
+    """转换为样本级列表"""
+    return val if isinstance(val, list) else [val] * n
+def _length_reward(text: str, min_len: int, max_len: int) -> float:
+    """单样本长度奖励"""
+    original = text.split("</think>:", 1)[1].strip() if "</think>:" in text else text.strip()
+    length = len(original)
+    if length < min_len:
+        return length / min_len + 1  # 1~2线性增长
+    elif length > max_len:
+        return max_len / length + 1  # 2~1线性衰减
+    return 2.0
+def _format_reward(text: str) -> float:
+    """单样本格式奖励"""
+    if "<think>" not in text or "</think>:" not in text:
+        return -2.0
+    think_content = text.split("<think>")[1].split("</think>")[0].strip()
+    return 2.0 if think_content else -2.0
+def _batch_keyword_reward(texts: List[str], keywords_list: List[List[str]]) -> List[float]:
+    """批量关键词匹配（优化：使用 GPU 并行计算）"""
+    originals = [text.split("</think>:", 1)[1].strip() if "</think>:" in text else text.strip() for text in texts]
+    valid_indices = [i for i, orig in enumerate(originals) if orig and keywords_list[i]]
+    if not valid_indices:
+        return [0.8 if not kw else -2.0 for kw in keywords_list]  # 无关键词时默认0.8
+    valid_originals = [originals[i] for i in valid_indices]
+    valid_keywords = [keywords_list[i] for i in valid_indices]
+    # 让计算在 GPU 上执行
+    original_embs = embedder.encode(valid_originals, convert_to_tensor=True)
+    keyword_embs = [embedder.encode(kw, convert_to_tensor=True) for kw in valid_keywords]
+    similarities = [
+        util.pytorch_cos_sim(orig_emb, kw_emb).mean().item()
+        for orig_emb, kw_emb in zip(original_embs, keyword_embs)
+    ]
+    # 分配奖励
+    rewards = []
+    sim_idx = 0
+    for i, kw in enumerate(keywords_list):
+        if i in valid_indices:
+            sim = similarities[sim_idx]
+            rewards.append(2.0 if sim >= 0.6 else (1.2 if sim >= 0.4 else 0.8))
+            sim_idx += 1
+        else:
+            rewards.append(0.8 if not kw else -2.0)
+    return rewards
+def _language_reward(text: str) -> float:
+    """单样本语言奖励"""
+    chinese_chars = len(re.findall(r'[\u4e00-\u9fff]', text))
+    ratio = chinese_chars / max(1, len(text))
+    if ratio >= 0.9:
+        return 2.0
+    elif ratio >= 0.7:
+        return 1.4
+    return 0.7
+# ------------ 运行示例 ------------
+if __name__ == "__main__":
+    samples = [
+        "科技<think>技术创新是关键</think>:人工智能在医疗领域的应用正在改变诊断方式。",
+        "无效样本<think></think>:无意义内容",
+        "经济<think>宏观经济分析</think>:全球供应链重构对发展中国家影响深远。"
+    ]
+    keywords = [
+        ["科技", "人工智能"],
+        [],  # 空关键词
+        ["经济", "供应链"]
+    ]
+    # 并行计算
+    rewards = compute_rewards(
+        completions=samples,
+        keywords=keywords,
+        min_len=[50, 10, 80],
+        return_components=True
+    )
+    print("总奖励:", rewards["rewards"])
+    print("长度奖励:", rewards["length_rewards"])
+    print("格式奖励:", rewards["format_rewards"])
+    print("关键词奖励:", rewards["keyword_rewards"])
+    print("语言奖励:", rewards["language_rewards"])

code/test.ipynb ADDED Viewed

	@@ -0,0 +1,144 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "initial_id",
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-04-02T06:42:56.681032Z",
+     "start_time": "2025-04-02T06:42:19.346090Z"
+    },
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "from transformers import AutoModelForCausalLM, AutoTokenizer\n",
+    "from peft import PeftModel\n",
+    "# 1. 加载基础模型和LoRA适配器\n",
+    "base_model = AutoModelForCausalLM.from_pretrained(\"deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B\")#您也可以使用GPU推理\n",
+    "model = PeftModel.from_pretrained(base_model, \"../DS_RL_model\")  # .to(\"cuda\")使用GPU加速推理\n",
+    "tokenizer = AutoTokenizer.from_pretrained(\"deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B\")\n",
+    "tokenizer.pad_token = tokenizer.eos_token\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "c805805aeaabd6a8",
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-04-02T07:41:36.715675Z",
+     "start_time": "2025-04-02T07:41:27.640736Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "模型输出: 根据以下关键词生成一首歌词，歌词中包含多个句子，确保句子通顺,诗意,格式正确.让我们一步一步的思考（思考过程包含在<think>和</think>之间）:月光，欢乐，伊甸园，月光下的欢乐，小猪们、小羊们，月光下的欢乐。月光下的欢乐，小猪们、小羊们，月光下的欢乐，小猪们、小羊们，月光下的欢乐，月光下的欢乐，小猪们、小羊们，月光下的欢乐。月光，小猪们、小羊们，月光下的欢乐，月光下的欢乐，月光下的欢乐，月光下的欢乐，月光下的欢乐，月光下的欢乐，月光下的欢乐，月光下的欢乐，月光下的欢乐。\n",
+      "\n",
+      "嗯，我现在需要帮用户生成一首关于月光、欢乐、伊甸园的歌词。用户给了一个比较长的查询，里面有很多重复的句子，可能想要更简洁或者更流畅的歌词。我得先理解用户的需求，可能他们是在做一个儿童文学作品，或者是在学习如何创作歌词。\n",
+      "\n",
+      "首先，关键词有月光、欢乐、伊甸园、小猪、小羊。所以歌词里要包含这些元素。用户给出的回复里有很多重复，可能是因为想通过多个句子来强调主题，让读者更容易理解和记忆。\n",
+      "\n",
+      "我需要确保歌词结构合理，有起承转合，句子通顺。可能用户希望歌词有一定的押韵和节奏感，这样读起来更顺口。同时，格式要正确，可能需要遵循中文诗歌的格式，比如分句、押韵等。\n",
+      "\n",
+      "另外，用户提供的回复是多次重复的句子，可能是因为想强调月光下的欢乐，让读者感受到那种温馨和欢乐。我需要在生成歌词时，把这些元素自然地融入进去，而不是单纯地重复。\n",
+      "\n",
+      "我还得考虑歌词的情感基调，是欢快的还是带有感慨的。用户没有特别说明，但关键词中提到“欢乐”和“月光”，感觉偏向于积极向上的情感。\n",
+      "\n",
+      "可能需要避免过于复杂的结构，保持歌词简洁明了，同时有足够的意象来传达主题。比如，用“月光下的欢乐”这样的词句，可以增强画面感，让读者有身临其境的感觉。\n",
+      "\n",
+      "另外，用户提到了小猪和小羊，可能是在描绘一个小动物们的场景，或者是在描述一个充满欢乐的小世界。可能需要把这些元素融合在歌词中，让读者感受到那种温暖和快乐。\n",
+      "\n",
+      "我还需要注意押韵，虽然中文诗歌不一定严格押韵，但要有一定的节奏感。选择合适的结尾词来增强主题的表达。\n",
+      "\n",
+      "总的来说，我需要把月光、欢乐、小猪、小羊、伊甸园这几个元素有机地融入歌词中，确保结构合理，情感流畅，同时保持格式正确。可能需要多试几遍，调整用词和句式，直到满意为止。\n",
+      "</think>\n",
+      "\n",
+      "## 《月光下的欢乐》\n",
+      "\n",
+      "月光如水般温柔,\n",
+      "在掌心流淌着幸福的泪。\n",
+      "\n",
+      "小猪们、小羊们,\n",
+      "在伊甸园里跳跃舞。\n",
+      "月光下欢声笑语,\n",
+      "欢声笑语映照着我们的脸。\n",
+      "\n",
+      "月光下欢声笑语,\n",
+      "月光下欢声笑语,\n",
+      "月光下欢声笑语,\n",
+      "月光下欢声笑语。\n",
+      "\n",
+      "月光下欢声笑语,\n",
+      "月光下欢声笑语,\n",
+      "月光下欢声笑语,\n",
+      "月光下欢声笑语。\n",
+      "\n",
+      "月光下欢声笑语,\n",
+      "月光下欢声笑语,\n",
+      "月光下欢声笑语,\n",
+      "月光下欢声笑语。\n",
+      "\n",
+      "月光下欢声笑语,\n",
+      "月光下欢声笑语,\n",
+      "月光下欢声笑语,\n",
+      "月光下欢声笑语。\n",
+      "\n",
+      "月光下欢声笑语,\n",
+      "月光下欢声笑语,\n",
+      "月光下欢声笑语,\n",
+      "月光下欢声笑语。\n"
+     ]
+    }
+   ],
+   "source": [
+    "# 2. 准备提示词\n",
+    "prompt = \"根据以下关键词生成一首歌词，歌词中包含多个句子，确保句子通顺,诗意,格式正确.让我们一步一步的思考（思考过程包含在<think>和</think>之间）:月光，欢乐，伊甸园\" \n",
+    "\n",
+    "# 3. 编码并生成回复\n",
+    "inputs = tokenizer(prompt, return_tensors=\"pt\")\n",
+    "\n",
+    "# 4. 生成参数设置\n",
+    "outputs = model.generate(\n",
+    "    input_ids=inputs[\"input_ids\"],\n",
+    "    attention_mask=inputs[\"attention_mask\"],\n",
+    "    max_new_tokens=2048,  # 生成的最大token数\n",
+    "    do_sample=True,      # 启用随机采样\n",
+    "    temperature=0.9,     # 控制随机性 (0.1-1.0)\n",
+    "    top_p=0.9,           # nucleus sampling参数\n",
+    "    pad_token_id=tokenizer.eos_token_id\n",
+    ")\n",
+    "\n",
+    "# 5. 解码并打印结果\n",
+    "generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)\n",
+    "print(\"模型输出:\", generated_text)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": ".venv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

code/threads_data_extract.py ADDED Viewed

	@@ -0,0 +1,194 @@

+import os
+import openai
+import threading
+from concurrent.futures import ThreadPoolExecutor
+from openai import APIError
+from typing import List, Tuple
+class FormatValidator:
+    """数据格式验证器"""
+    @staticmethod
+    def validate_line(keywords: List[str], original: str) -> str:
+        """
+        格式：关键词1，关键词2，关键词3：原文
+        """
+        # 清洗关键词中的非法符号
+        cleaned_keywords = [
+            kw.strip().replace('：', '').replace('\n', '')[:10]  # 限制关键词长度
+            for kw in keywords if kw.strip()
+        ][:3]  # 最多取前3个关键词
+        # 处理空关键词情况
+        if not cleaned_keywords:
+            keywords_str = "无关键词"
+        else:
+            keywords_str = "，".join(cleaned_keywords)
+        # 移除原文中的换行符
+        cleaned_original = original.strip().replace('\n', ' ')
+        return f"{keywords_str}：{cleaned_original}"
+class ThreadSafeWriter:
+    """增强型线程安全写入器"""
+    def __init__(self, output_path: str):
+        self.file = open(output_path, 'a+', encoding='utf-8')
+        self.lock = threading.Lock()
+        self.counter = 0  # 写入计数器
+    def write_line(self, content: str):
+        with self.lock:
+            self.file.write(content + '\n')
+            self.file.flush()
+            self.counter += 1
+    def get_progress(self):
+        with self.lock:
+            return self.counter
+    def close(self):
+        self.file.close()
+class DeepSeekBatchProcessor:
+    def __init__(self, max_workers: int = 100):
+        self.client = openai.OpenAI(
+            api_key=os.getenv("DEEPSEEK_API_KEY", "sk-4da7e956235447e3b7bec1b51f5a3db7"),
+            base_url="https://api.deepseek.com"
+        )
+        self.max_workers = max_workers
+        self.error_flag = threading.Event()
+        self.rate_limiter = threading.Semaphore(20)  # API速率限制
+    def process_batch(self, batch: List[Tuple[int, str]], writer: ThreadSafeWriter):
+        """批量处理并保持顺序"""
+        futures = []
+        with ThreadPoolExecutor(max_workers=self.max_workers) as executor:
+            for line_num, original in batch:
+                if self.error_flag.is_set():
+                    break
+                futures.append(
+                    executor.submit(
+                        self._process_single_line,
+                        line_num,
+                        original,
+                        writer
+                    )
+                )
+            # 等待当前批次完成
+            for future in futures:
+                future.result()
+    def _process_single_line(self, line_num: int, original: str, writer: ThreadSafeWriter):
+        if self.error_flag.is_set():
+            return
+        retries = 0
+        while retries < 3 and not self.error_flag.is_set():
+            try:
+                with self.rate_limiter:
+                    response = self.client.chat.completions.create(
+                        model="deepseek-reasoner",
+                        messages=[
+                            {"role": "system", "content": self._get_prompt()},
+                            {"role": "user", "content": original}
+                        ],
+                        temperature=0.1,
+                        max_tokens=30
+                    )
+                # 解析响应
+                keywords = self._parse_response(response)
+                formatted_line = FormatValidator.validate_line(keywords, original)
+                writer.write_line(formatted_line)
+                # 更新进度
+                progress = writer.get_progress()
+                print(f"\r已处理 {progress} 条", end='')
+                break  # 成功时退出重试循环
+            except APIError as e:
+                if e.status_code == 402:  # 余额不足
+                    print(f"\n行 {line_num} 处理失败：API余额不足")
+                    self.error_flag.set()
+                    return
+                elif e.status_code == 429:  # 速率限制
+                    print(f"\n行 {line_num} 速率受限，重试中...")
+                    retries += 1
+                    if retries >= 3:
+                        print(f"行 {line_num} 重试次数耗尽")
+                else:
+                    print(f"\n行 {line_num} API错误[{e.status_code}]：{e.message}")
+                    return  # 其他API错误不重试
+            except Exception as e:
+                print(f"\n行 {line_num} 处理异常：{str(e)}")
+                retries += 1
+                if retries >= 3:
+                    print(f"行 {line_num} 重试次数耗尽")
+        # 重试失败处理
+        if retries >= 3 and not self.error_flag.is_set():
+            writer.write_line(f"处理失败：{original}")  # 记录失败行
+    @staticmethod
+    def _get_prompt() -> str:
+        return
+    @staticmethod
+    def _parse_response(response) -> List[str]:
+        content = response.choices[0].message.content.strip()
+        return [kw.strip("。、") for kw in content.replace('，', ',').split(',') if kw]
+def process_large_file(
+    input_path: str,
+    output_path: str,
+    batch_size: int = 500,
+    max_workers: int = 100
+):
+    """大文件处理入口"""
+    # 初始化组件
+    processor = DeepSeekBatchProcessor(max_workers)
+    writer = ThreadSafeWriter(output_path)
+    try:
+        # 读取并批处理数据
+        with open(input_path, 'r', encoding='utf-8') as f:
+            # 生成带行号的批次 [(行号, 内容), ...]
+            batches = []
+            current_batch = []
+            for line_num, line in enumerate(f, 1):
+                if line.strip():
+                    current_batch.append( (line_num, line.strip()) )
+                    if len(current_batch) >= batch_size:
+                        batches.append(current_batch)
+                        current_batch = []
+            if current_batch:
+                batches.append(current_batch)
+        # 按批次处理（保持批次顺序）
+        total = sum(len(b) for b in batches)
+        print(f"总数据量：{total}条")
+        for batch in batches:
+            if processor.error_flag.is_set():
+                break
+            processor.process_batch(batch, writer)
+        print("\n处理完成！")
+    finally:
+        writer.close()
+if __name__ == '__main__':
+    # 文件路径配置
+    input_file = "data\DSdata.txt"
+    output_file = "data\CoTdata.txt"
+    # 启动处理流程
+    process_large_file(
+        input_path=input_file,
+        output_path=output_file,
+        batch_size=500,
+        max_workers=100
+    )

requirements.txt ADDED Viewed

	@@ -0,0 +1,16 @@

+PyQt5>=5.15
+transformers>=4.30
+peft>=0.15
+torch>=2.0
+numpy
+matplotlib
+jupyter
+trl
+datasets
+accelerate
+safetensors
+scipy
+tqdm
+tensorboard
+sentence-transformers
+openai