Student0809 commited on Jul 24, 2025

Commit

e791fa3

verified ·

1 Parent(s): 822b8fc

Add files using upload-large-folder tool

Browse files

Files changed (20) hide show

.github/ISSUE_TEMPLATE/feature_request.md +17 -0
.ipynb_checkpoints/4JOB_TRAIN-checkpoint.jsonl +0 -0
.ipynb_checkpoints/HM_TEST-checkpoint.jsonl +44 -0
.ipynb_checkpoints/checkMissing-checkpoint.py +86 -0
.ipynb_checkpoints/cotSFT_gemini-checkpoint.json +0 -0
.ipynb_checkpoints/count_audios-checkpoint.py +69 -0
.ipynb_checkpoints/filter_duration-checkpoint.py +129 -0
.ipynb_checkpoints/gen_data-checkpoint.py +206 -0
.ipynb_checkpoints/intersect_jsonl-checkpoint.py +39 -0
.ipynb_checkpoints/maketar-checkpoint.py +35 -0
.ipynb_checkpoints/merge_lora-checkpoint.sh +5 -0
.ipynb_checkpoints/requirements-checkpoint.txt +1 -0
.ipynb_checkpoints/setup-checkpoint.py +165 -0
.ipynb_checkpoints/test_qwenOmni-checkpoint.py +177 -0
.ipynb_checkpoints/train-checkpoint.sh +64 -0
4JOB/.ipynb_checkpoints/process_overlaps-checkpoint.py +106 -0
4JOB/filter.py +132 -0
4JOB/process_overlaps.py +106 -0
4JOB/process_silence.py +84 -0
4JOB/process_transcription.py +80 -0

.github/ISSUE_TEMPLATE/feature_request.md ADDED Viewed

	@@ -0,0 +1,17 @@

+---
+name: Feature request
+about: Suggest an idea for this project
+title: ''
+labels: ''
+assignees: ''
+---
+**Describe the feature**
+Please describe the feature requested here(请在这里描述需求)
+**Paste any useful information**
+Paste any useful information, including papers, github links, etc.(请在这里描述其他有用的信息，比如相关的论文地址，github链接等)
+**Additional context**
+Add any other context or information here(其他信息可以写在这里)

.ipynb_checkpoints/4JOB_TRAIN-checkpoint.jsonl ADDED Viewed

The diff for this file is too large to render. See raw diff

.ipynb_checkpoints/HM_TEST-checkpoint.jsonl ADDED Viewed

	@@ -0,0 +1,44 @@

+{"messages": [{"role": "user", "content": "<audio># Interactional Dialogue Evaluation\n\n**IMPORTANT**: Evaluation must include `<response think>` and `<fluency think>` analysis and `<overall score>` rating.\nListen to a two-person interactional dialogue speech (Dual-channel audio, with each channel representing one speaker), labeled as speakers A and B. Evaluate the quality of the interaction, focusing on:\n**Response Relevance:** \n**logical consistency, topic coherence**\n**Interactional Fluency:**\n**Detect and evaluate extended vocal overlaps, e.g., cross-channel overlap.**\n**Detect and evaluate long pauses, e.g., pauses more than 3s between speaker turns.\n\n****Note**: Small pauses and brief overlaps in audio are acceptable, while prolonged pauses and overlapping audio are harmful. You should consider Response Relevance and Interactional Fluency separately, and provide the corresponding thinking process.\n\n## Scoring Criteria\nAssign a single holistic score based on the combined evaluation:\n`1` (Poor): Significant issues in either  **Response Relevance ** or  **Interactional Fluency. **\n`2` (Excellent): Both **Response Relevance ** and  **Interactional Fluency ** are consistently appropriate and natural.\n## Evaluation Output Format:\nStrictly follow this template:\n<response think>\n[Analysing Response Relevance and giving reasons for scoring...]\n</response think>\n<fluency think>\n[Analysing Interactional Fluency and giving reasons for scoring.]\n</fluency think>\n<overall score>X</overall score>\n"}], "audios": ["/root/autodl-tmp/wavrewardDataset/conversations/data/testdata/predict_result_mission4/audios/第16开始txt不规范/001.wav"], "solution": 2}
+{"messages": [{"role": "user", "content": "<audio># Interactional Dialogue Evaluation\n\n**IMPORTANT**: Evaluation must include `<response think>` and `<fluency think>` analysis and `<overall score>` rating.\nListen to a two-person interactional dialogue speech (Dual-channel audio, with each channel representing one speaker), labeled as speakers A and B. Evaluate the quality of the interaction, focusing on:\n**Response Relevance:** \n**logical consistency, topic coherence**\n**Interactional Fluency:**\n**Detect and evaluate extended vocal overlaps, e.g., cross-channel overlap.**\n**Detect and evaluate long pauses, e.g., pauses more than 3s between speaker turns.\n\n****Note**: Small pauses and brief overlaps in audio are acceptable, while prolonged pauses and overlapping audio are harmful. You should consider Response Relevance and Interactional Fluency separately, and provide the corresponding thinking process.\n\n## Scoring Criteria\nAssign a single holistic score based on the combined evaluation:\n`1` (Poor): Significant issues in either  **Response Relevance ** or  **Interactional Fluency. **\n`2` (Excellent): Both **Response Relevance ** and  **Interactional Fluency ** are consistently appropriate and natural.\n## Evaluation Output Format:\nStrictly follow this template:\n<response think>\n[Analysing Response Relevance and giving reasons for scoring...]\n</response think>\n<fluency think>\n[Analysing Interactional Fluency and giving reasons for scoring.]\n</fluency think>\n<overall score>X</overall score>\n"}], "audios": ["/root/autodl-tmp/wavrewardDataset/conversations/data/testdata/predict_result_mission4/audios/第16开始txt不规范/002.wav"], "solution": 2}
+{"messages": [{"role": "user", "content": "<audio># Interactional Dialogue Evaluation\n\n**IMPORTANT**: Evaluation must include `<response think>` and `<fluency think>` analysis and `<overall score>` rating.\nListen to a two-person interactional dialogue speech (Dual-channel audio, with each channel representing one speaker), labeled as speakers A and B. Evaluate the quality of the interaction, focusing on:\n**Response Relevance:** \n**logical consistency, topic coherence**\n**Interactional Fluency:**\n**Detect and evaluate extended vocal overlaps, e.g., cross-channel overlap.**\n**Detect and evaluate long pauses, e.g., pauses more than 3s between speaker turns.\n\n****Note**: Small pauses and brief overlaps in audio are acceptable, while prolonged pauses and overlapping audio are harmful. You should consider Response Relevance and Interactional Fluency separately, and provide the corresponding thinking process.\n\n## Scoring Criteria\nAssign a single holistic score based on the combined evaluation:\n`1` (Poor): Significant issues in either  **Response Relevance ** or  **Interactional Fluency. **\n`2` (Excellent): Both **Response Relevance ** and  **Interactional Fluency ** are consistently appropriate and natural.\n## Evaluation Output Format:\nStrictly follow this template:\n<response think>\n[Analysing Response Relevance and giving reasons for scoring...]\n</response think>\n<fluency think>\n[Analysing Interactional Fluency and giving reasons for scoring.]\n</fluency think>\n<overall score>X</overall score>\n"}], "audios": ["/root/autodl-tmp/wavrewardDataset/conversations/data/testdata/predict_result_mission4/audios/第16开始txt不规范/003.wav"], "solution": 2}
+{"messages": [{"role": "user", "content": "<audio># Interactional Dialogue Evaluation\n\n**IMPORTANT**: Evaluation must include `<response think>` and `<fluency think>` analysis and `<overall score>` rating.\nListen to a two-person interactional dialogue speech (Dual-channel audio, with each channel representing one speaker), labeled as speakers A and B. Evaluate the quality of the interaction, focusing on:\n**Response Relevance:** \n**logical consistency, topic coherence**\n**Interactional Fluency:**\n**Detect and evaluate extended vocal overlaps, e.g., cross-channel overlap.**\n**Detect and evaluate long pauses, e.g., pauses more than 3s between speaker turns.\n\n****Note**: Small pauses and brief overlaps in audio are acceptable, while prolonged pauses and overlapping audio are harmful. You should consider Response Relevance and Interactional Fluency separately, and provide the corresponding thinking process.\n\n## Scoring Criteria\nAssign a single holistic score based on the combined evaluation:\n`1` (Poor): Significant issues in either  **Response Relevance ** or  **Interactional Fluency. **\n`2` (Excellent): Both **Response Relevance ** and  **Interactional Fluency ** are consistently appropriate and natural.\n## Evaluation Output Format:\nStrictly follow this template:\n<response think>\n[Analysing Response Relevance and giving reasons for scoring...]\n</response think>\n<fluency think>\n[Analysing Interactional Fluency and giving reasons for scoring.]\n</fluency think>\n<overall score>X</overall score>\n"}], "audios": ["/root/autodl-tmp/wavrewardDataset/conversations/data/testdata/predict_result_mission4/audios/第16开始txt不规范/004.wav"], "solution": 2}
+{"messages": [{"role": "user", "content": "<audio># Interactional Dialogue Evaluation\n\n**IMPORTANT**: Evaluation must include `<response think>` and `<fluency think>` analysis and `<overall score>` rating.\nListen to a two-person interactional dialogue speech (Dual-channel audio, with each channel representing one speaker), labeled as speakers A and B. Evaluate the quality of the interaction, focusing on:\n**Response Relevance:** \n**logical consistency, topic coherence**\n**Interactional Fluency:**\n**Detect and evaluate extended vocal overlaps, e.g., cross-channel overlap.**\n**Detect and evaluate long pauses, e.g., pauses more than 3s between speaker turns.\n\n****Note**: Small pauses and brief overlaps in audio are acceptable, while prolonged pauses and overlapping audio are harmful. You should consider Response Relevance and Interactional Fluency separately, and provide the corresponding thinking process.\n\n## Scoring Criteria\nAssign a single holistic score based on the combined evaluation:\n`1` (Poor): Significant issues in either  **Response Relevance ** or  **Interactional Fluency. **\n`2` (Excellent): Both **Response Relevance ** and  **Interactional Fluency ** are consistently appropriate and natural.\n## Evaluation Output Format:\nStrictly follow this template:\n<response think>\n[Analysing Response Relevance and giving reasons for scoring...]\n</response think>\n<fluency think>\n[Analysing Interactional Fluency and giving reasons for scoring.]\n</fluency think>\n<overall score>X</overall score>\n"}], "audios": ["/root/autodl-tmp/wavrewardDataset/conversations/data/testdata/predict_result_mission4/audios/第16开始txt不规范/005.wav"], "solution": 2}
+{"messages": [{"role": "user", "content": "<audio># Interactional Dialogue Evaluation\n\n**IMPORTANT**: Evaluation must include `<response think>` and `<fluency think>` analysis and `<overall score>` rating.\nListen to a two-person interactional dialogue speech (Dual-channel audio, with each channel representing one speaker), labeled as speakers A and B. Evaluate the quality of the interaction, focusing on:\n**Response Relevance:** \n**logical consistency, topic coherence**\n**Interactional Fluency:**\n**Detect and evaluate extended vocal overlaps, e.g., cross-channel overlap.**\n**Detect and evaluate long pauses, e.g., pauses more than 3s between speaker turns.\n\n****Note**: Small pauses and brief overlaps in audio are acceptable, while prolonged pauses and overlapping audio are harmful. You should consider Response Relevance and Interactional Fluency separately, and provide the corresponding thinking process.\n\n## Scoring Criteria\nAssign a single holistic score based on the combined evaluation:\n`1` (Poor): Significant issues in either  **Response Relevance ** or  **Interactional Fluency. **\n`2` (Excellent): Both **Response Relevance ** and  **Interactional Fluency ** are consistently appropriate and natural.\n## Evaluation Output Format:\nStrictly follow this template:\n<response think>\n[Analysing Response Relevance and giving reasons for scoring...]\n</response think>\n<fluency think>\n[Analysing Interactional Fluency and giving reasons for scoring.]\n</fluency think>\n<overall score>X</overall score>\n"}], "audios": ["/root/autodl-tmp/wavrewardDataset/conversations/data/testdata/predict_result_mission4/audios/第16开始txt不规范/006.wav"], "solution": 2}
+{"messages": [{"role": "user", "content": "<audio># Interactional Dialogue Evaluation\n\n**IMPORTANT**: Evaluation must include `<response think>` and `<fluency think>` analysis and `<overall score>` rating.\nListen to a two-person interactional dialogue speech (Dual-channel audio, with each channel representing one speaker), labeled as speakers A and B. Evaluate the quality of the interaction, focusing on:\n**Response Relevance:** \n**logical consistency, topic coherence**\n**Interactional Fluency:**\n**Detect and evaluate extended vocal overlaps, e.g., cross-channel overlap.**\n**Detect and evaluate long pauses, e.g., pauses more than 3s between speaker turns.\n\n****Note**: Small pauses and brief overlaps in audio are acceptable, while prolonged pauses and overlapping audio are harmful. You should consider Response Relevance and Interactional Fluency separately, and provide the corresponding thinking process.\n\n## Scoring Criteria\nAssign a single holistic score based on the combined evaluation:\n`1` (Poor): Significant issues in either  **Response Relevance ** or  **Interactional Fluency. **\n`2` (Excellent): Both **Response Relevance ** and  **Interactional Fluency ** are consistently appropriate and natural.\n## Evaluation Output Format:\nStrictly follow this template:\n<response think>\n[Analysing Response Relevance and giving reasons for scoring...]\n</response think>\n<fluency think>\n[Analysing Interactional Fluency and giving reasons for scoring.]\n</fluency think>\n<overall score>X</overall score>\n"}], "audios": ["/root/autodl-tmp/wavrewardDataset/conversations/data/testdata/predict_result_mission4/audios/第16开始txt不规范/007.wav"], "solution": 2}
+{"messages": [{"role": "user", "content": "<audio># Interactional Dialogue Evaluation\n\n**IMPORTANT**: Evaluation must include `<response think>` and `<fluency think>` analysis and `<overall score>` rating.\nListen to a two-person interactional dialogue speech (Dual-channel audio, with each channel representing one speaker), labeled as speakers A and B. Evaluate the quality of the interaction, focusing on:\n**Response Relevance:** \n**logical consistency, topic coherence**\n**Interactional Fluency:**\n**Detect and evaluate extended vocal overlaps, e.g., cross-channel overlap.**\n**Detect and evaluate long pauses, e.g., pauses more than 3s between speaker turns.\n\n****Note**: Small pauses and brief overlaps in audio are acceptable, while prolonged pauses and overlapping audio are harmful. You should consider Response Relevance and Interactional Fluency separately, and provide the corresponding thinking process.\n\n## Scoring Criteria\nAssign a single holistic score based on the combined evaluation:\n`1` (Poor): Significant issues in either  **Response Relevance ** or  **Interactional Fluency. **\n`2` (Excellent): Both **Response Relevance ** and  **Interactional Fluency ** are consistently appropriate and natural.\n## Evaluation Output Format:\nStrictly follow this template:\n<response think>\n[Analysing Response Relevance and giving reasons for scoring...]\n</response think>\n<fluency think>\n[Analysing Interactional Fluency and giving reasons for scoring.]\n</fluency think>\n<overall score>X</overall score>\n"}], "audios": ["/root/autodl-tmp/wavrewardDataset/conversations/data/testdata/predict_result_mission4/audios/第16开始txt不规范/008.wav"], "solution": 2}
+{"messages": [{"role": "user", "content": "<audio># Interactional Dialogue Evaluation\n\n**IMPORTANT**: Evaluation must include `<response think>` and `<fluency think>` analysis and `<overall score>` rating.\nListen to a two-person interactional dialogue speech (Dual-channel audio, with each channel representing one speaker), labeled as speakers A and B. Evaluate the quality of the interaction, focusing on:\n**Response Relevance:** \n**logical consistency, topic coherence**\n**Interactional Fluency:**\n**Detect and evaluate extended vocal overlaps, e.g., cross-channel overlap.**\n**Detect and evaluate long pauses, e.g., pauses more than 3s between speaker turns.\n\n****Note**: Small pauses and brief overlaps in audio are acceptable, while prolonged pauses and overlapping audio are harmful. You should consider Response Relevance and Interactional Fluency separately, and provide the corresponding thinking process.\n\n## Scoring Criteria\nAssign a single holistic score based on the combined evaluation:\n`1` (Poor): Significant issues in either  **Response Relevance ** or  **Interactional Fluency. **\n`2` (Excellent): Both **Response Relevance ** and  **Interactional Fluency ** are consistently appropriate and natural.\n## Evaluation Output Format:\nStrictly follow this template:\n<response think>\n[Analysing Response Relevance and giving reasons for scoring...]\n</response think>\n<fluency think>\n[Analysing Interactional Fluency and giving reasons for scoring.]\n</fluency think>\n<overall score>X</overall score>\n"}], "audios": ["/root/autodl-tmp/wavrewardDataset/conversations/data/testdata/predict_result_mission4/audios/第16开始txt不规范/009.wav"], "solution": 2}
+{"messages": [{"role": "user", "content": "<audio># Interactional Dialogue Evaluation\n\n**IMPORTANT**: Evaluation must include `<response think>` and `<fluency think>` analysis and `<overall score>` rating.\nListen to a two-person interactional dialogue speech (Dual-channel audio, with each channel representing one speaker), labeled as speakers A and B. Evaluate the quality of the interaction, focusing on:\n**Response Relevance:** \n**logical consistency, topic coherence**\n**Interactional Fluency:**\n**Detect and evaluate extended vocal overlaps, e.g., cross-channel overlap.**\n**Detect and evaluate long pauses, e.g., pauses more than 3s between speaker turns.\n\n****Note**: Small pauses and brief overlaps in audio are acceptable, while prolonged pauses and overlapping audio are harmful. You should consider Response Relevance and Interactional Fluency separately, and provide the corresponding thinking process.\n\n## Scoring Criteria\nAssign a single holistic score based on the combined evaluation:\n`1` (Poor): Significant issues in either  **Response Relevance ** or  **Interactional Fluency. **\n`2` (Excellent): Both **Response Relevance ** and  **Interactional Fluency ** are consistently appropriate and natural.\n## Evaluation Output Format:\nStrictly follow this template:\n<response think>\n[Analysing Response Relevance and giving reasons for scoring...]\n</response think>\n<fluency think>\n[Analysing Interactional Fluency and giving reasons for scoring.]\n</fluency think>\n<overall score>X</overall score>\n"}], "audios": ["/root/autodl-tmp/wavrewardDataset/conversations/data/testdata/predict_result_mission4/audios/第16开始txt不规范/010.wav"], "solution": 2}
+{"messages": [{"role": "user", "content": "<audio># Interactional Dialogue Evaluation\n\n**IMPORTANT**: Evaluation must include `<response think>` and `<fluency think>` analysis and `<overall score>` rating.\nListen to a two-person interactional dialogue speech (Dual-channel audio, with each channel representing one speaker), labeled as speakers A and B. Evaluate the quality of the interaction, focusing on:\n**Response Relevance:** \n**logical consistency, topic coherence**\n**Interactional Fluency:**\n**Detect and evaluate extended vocal overlaps, e.g., cross-channel overlap.**\n**Detect and evaluate long pauses, e.g., pauses more than 3s between speaker turns.\n\n****Note**: Small pauses and brief overlaps in audio are acceptable, while prolonged pauses and overlapping audio are harmful. You should consider Response Relevance and Interactional Fluency separately, and provide the corresponding thinking process.\n\n## Scoring Criteria\nAssign a single holistic score based on the combined evaluation:\n`1` (Poor): Significant issues in either  **Response Relevance ** or  **Interactional Fluency. **\n`2` (Excellent): Both **Response Relevance ** and  **Interactional Fluency ** are consistently appropriate and natural.\n## Evaluation Output Format:\nStrictly follow this template:\n<response think>\n[Analysing Response Relevance and giving reasons for scoring...]\n</response think>\n<fluency think>\n[Analysing Interactional Fluency and giving reasons for scoring.]\n</fluency think>\n<overall score>X</overall score>\n"}], "audios": ["/root/autodl-tmp/wavrewardDataset/conversations/data/testdata/predict_result_mission4/audios/第16开始txt不规范/011.wav"], "solution": 2}
+{"messages": [{"role": "user", "content": "<audio># Interactional Dialogue Evaluation\n\n**IMPORTANT**: Evaluation must include `<response think>` and `<fluency think>` analysis and `<overall score>` rating.\nListen to a two-person interactional dialogue speech (Dual-channel audio, with each channel representing one speaker), labeled as speakers A and B. Evaluate the quality of the interaction, focusing on:\n**Response Relevance:** \n**logical consistency, topic coherence**\n**Interactional Fluency:**\n**Detect and evaluate extended vocal overlaps, e.g., cross-channel overlap.**\n**Detect and evaluate long pauses, e.g., pauses more than 3s between speaker turns.\n\n****Note**: Small pauses and brief overlaps in audio are acceptable, while prolonged pauses and overlapping audio are harmful. You should consider Response Relevance and Interactional Fluency separately, and provide the corresponding thinking process.\n\n## Scoring Criteria\nAssign a single holistic score based on the combined evaluation:\n`1` (Poor): Significant issues in either  **Response Relevance ** or  **Interactional Fluency. **\n`2` (Excellent): Both **Response Relevance ** and  **Interactional Fluency ** are consistently appropriate and natural.\n## Evaluation Output Format:\nStrictly follow this template:\n<response think>\n[Analysing Response Relevance and giving reasons for scoring...]\n</response think>\n<fluency think>\n[Analysing Interactional Fluency and giving reasons for scoring.]\n</fluency think>\n<overall score>X</overall score>\n"}], "audios": ["/root/autodl-tmp/wavrewardDataset/conversations/data/testdata/predict_result_mission4/audios/第16开始txt不规范/012.wav"], "solution": 2}
+{"messages": [{"role": "user", "content": "<audio># Interactional Dialogue Evaluation\n\n**IMPORTANT**: Evaluation must include `<response think>` and `<fluency think>` analysis and `<overall score>` rating.\nListen to a two-person interactional dialogue speech (Dual-channel audio, with each channel representing one speaker), labeled as speakers A and B. Evaluate the quality of the interaction, focusing on:\n**Response Relevance:** \n**logical consistency, topic coherence**\n**Interactional Fluency:**\n**Detect and evaluate extended vocal overlaps, e.g., cross-channel overlap.**\n**Detect and evaluate long pauses, e.g., pauses more than 3s between speaker turns.\n\n****Note**: Small pauses and brief overlaps in audio are acceptable, while prolonged pauses and overlapping audio are harmful. You should consider Response Relevance and Interactional Fluency separately, and provide the corresponding thinking process.\n\n## Scoring Criteria\nAssign a single holistic score based on the combined evaluation:\n`1` (Poor): Significant issues in either  **Response Relevance ** or  **Interactional Fluency. **\n`2` (Excellent): Both **Response Relevance ** and  **Interactional Fluency ** are consistently appropriate and natural.\n## Evaluation Output Format:\nStrictly follow this template:\n<response think>\n[Analysing Response Relevance and giving reasons for scoring...]\n</response think>\n<fluency think>\n[Analysing Interactional Fluency and giving reasons for scoring.]\n</fluency think>\n<overall score>X</overall score>\n"}], "audios": ["/root/autodl-tmp/wavrewardDataset/conversations/data/testdata/predict_result_mission4/audios/第16开始txt不规范/013.wav"], "solution": 2}
+{"messages": [{"role": "user", "content": "<audio># Interactional Dialogue Evaluation\n\n**IMPORTANT**: Evaluation must include `<response think>` and `<fluency think>` analysis and `<overall score>` rating.\nListen to a two-person interactional dialogue speech (Dual-channel audio, with each channel representing one speaker), labeled as speakers A and B. Evaluate the quality of the interaction, focusing on:\n**Response Relevance:** \n**logical consistency, topic coherence**\n**Interactional Fluency:**\n**Detect and evaluate extended vocal overlaps, e.g., cross-channel overlap.**\n**Detect and evaluate long pauses, e.g., pauses more than 3s between speaker turns.\n\n****Note**: Small pauses and brief overlaps in audio are acceptable, while prolonged pauses and overlapping audio are harmful. You should consider Response Relevance and Interactional Fluency separately, and provide the corresponding thinking process.\n\n## Scoring Criteria\nAssign a single holistic score based on the combined evaluation:\n`1` (Poor): Significant issues in either  **Response Relevance ** or  **Interactional Fluency. **\n`2` (Excellent): Both **Response Relevance ** and  **Interactional Fluency ** are consistently appropriate and natural.\n## Evaluation Output Format:\nStrictly follow this template:\n<response think>\n[Analysing Response Relevance and giving reasons for scoring...]\n</response think>\n<fluency think>\n[Analysing Interactional Fluency and giving reasons for scoring.]\n</fluency think>\n<overall score>X</overall score>\n"}], "audios": ["/root/autodl-tmp/wavrewardDataset/conversations/data/testdata/predict_result_mission4/audios/第16开始txt不规范/014.wav"], "solution": 2}
+{"messages": [{"role": "user", "content": "<audio># Interactional Dialogue Evaluation\n\n**IMPORTANT**: Evaluation must include `<response think>` and `<fluency think>` analysis and `<overall score>` rating.\nListen to a two-person interactional dialogue speech (Dual-channel audio, with each channel representing one speaker), labeled as speakers A and B. Evaluate the quality of the interaction, focusing on:\n**Response Relevance:** \n**logical consistency, topic coherence**\n**Interactional Fluency:**\n**Detect and evaluate extended vocal overlaps, e.g., cross-channel overlap.**\n**Detect and evaluate long pauses, e.g., pauses more than 3s between speaker turns.\n\n****Note**: Small pauses and brief overlaps in audio are acceptable, while prolonged pauses and overlapping audio are harmful. You should consider Response Relevance and Interactional Fluency separately, and provide the corresponding thinking process.\n\n## Scoring Criteria\nAssign a single holistic score based on the combined evaluation:\n`1` (Poor): Significant issues in either  **Response Relevance ** or  **Interactional Fluency. **\n`2` (Excellent): Both **Response Relevance ** and  **Interactional Fluency ** are consistently appropriate and natural.\n## Evaluation Output Format:\nStrictly follow this template:\n<response think>\n[Analysing Response Relevance and giving reasons for scoring...]\n</response think>\n<fluency think>\n[Analysing Interactional Fluency and giving reasons for scoring.]\n</fluency think>\n<overall score>X</overall score>\n"}], "audios": ["/root/autodl-tmp/wavrewardDataset/conversations/data/testdata/predict_result_mission4/audios/第16开始txt不规范/015.wav"], "solution": 2}
+{"messages": [{"role": "user", "content": "<audio># Interactional Dialogue Evaluation\n\n**IMPORTANT**: Evaluation must include `<response think>` and `<fluency think>` analysis and `<overall score>` rating.\nListen to a two-person interactional dialogue speech (Dual-channel audio, with each channel representing one speaker), labeled as speakers A and B. Evaluate the quality of the interaction, focusing on:\n**Response Relevance:** \n**logical consistency, topic coherence**\n**Interactional Fluency:**\n**Detect and evaluate extended vocal overlaps, e.g., cross-channel overlap.**\n**Detect and evaluate long pauses, e.g., pauses more than 3s between speaker turns.\n\n****Note**: Small pauses and brief overlaps in audio are acceptable, while prolonged pauses and overlapping audio are harmful. You should consider Response Relevance and Interactional Fluency separately, and provide the corresponding thinking process.\n\n## Scoring Criteria\nAssign a single holistic score based on the combined evaluation:\n`1` (Poor): Significant issues in either  **Response Relevance ** or  **Interactional Fluency. **\n`2` (Excellent): Both **Response Relevance ** and  **Interactional Fluency ** are consistently appropriate and natural.\n## Evaluation Output Format:\nStrictly follow this template:\n<response think>\n[Analysing Response Relevance and giving reasons for scoring...]\n</response think>\n<fluency think>\n[Analysing Interactional Fluency and giving reasons for scoring.]\n</fluency think>\n<overall score>X</overall score>\n"}], "audios": ["/root/autodl-tmp/wavrewardDataset/conversations/data/testdata/predict_result_mission4/audios/第16开始txt不规范/016.wav"], "solution": 2}
+{"messages": [{"role": "user", "content": "<audio># Interactional Dialogue Evaluation\n\n**IMPORTANT**: Evaluation must include `<response think>` and `<fluency think>` analysis and `<overall score>` rating.\nListen to a two-person interactional dialogue speech (Dual-channel audio, with each channel representing one speaker), labeled as speakers A and B. Evaluate the quality of the interaction, focusing on:\n**Response Relevance:** \n**logical consistency, topic coherence**\n**Interactional Fluency:**\n**Detect and evaluate extended vocal overlaps, e.g., cross-channel overlap.**\n**Detect and evaluate long pauses, e.g., pauses more than 3s between speaker turns.\n\n****Note**: Small pauses and brief overlaps in audio are acceptable, while prolonged pauses and overlapping audio are harmful. You should consider Response Relevance and Interactional Fluency separately, and provide the corresponding thinking process.\n\n## Scoring Criteria\nAssign a single holistic score based on the combined evaluation:\n`1` (Poor): Significant issues in either  **Response Relevance ** or  **Interactional Fluency. **\n`2` (Excellent): Both **Response Relevance ** and  **Interactional Fluency ** are consistently appropriate and natural.\n## Evaluation Output Format:\nStrictly follow this template:\n<response think>\n[Analysing Response Relevance and giving reasons for scoring...]\n</response think>\n<fluency think>\n[Analysing Interactional Fluency and giving reasons for scoring.]\n</fluency think>\n<overall score>X</overall score>\n"}], "audios": ["/root/autodl-tmp/wavrewardDataset/conversations/data/testdata/predict_result_mission4/audios/第16开始txt不规范/017.wav"], "solution": 2}
+{"messages": [{"role": "user", "content": "<audio># Interactional Dialogue Evaluation\n\n**IMPORTANT**: Evaluation must include `<response think>` and `<fluency think>` analysis and `<overall score>` rating.\nListen to a two-person interactional dialogue speech (Dual-channel audio, with each channel representing one speaker), labeled as speakers A and B. Evaluate the quality of the interaction, focusing on:\n**Response Relevance:** \n**logical consistency, topic coherence**\n**Interactional Fluency:**\n**Detect and evaluate extended vocal overlaps, e.g., cross-channel overlap.**\n**Detect and evaluate long pauses, e.g., pauses more than 3s between speaker turns.\n\n****Note**: Small pauses and brief overlaps in audio are acceptable, while prolonged pauses and overlapping audio are harmful. You should consider Response Relevance and Interactional Fluency separately, and provide the corresponding thinking process.\n\n## Scoring Criteria\nAssign a single holistic score based on the combined evaluation:\n`1` (Poor): Significant issues in either  **Response Relevance ** or  **Interactional Fluency. **\n`2` (Excellent): Both **Response Relevance ** and  **Interactional Fluency ** are consistently appropriate and natural.\n## Evaluation Output Format:\nStrictly follow this template:\n<response think>\n[Analysing Response Relevance and giving reasons for scoring...]\n</response think>\n<fluency think>\n[Analysing Interactional Fluency and giving reasons for scoring.]\n</fluency think>\n<overall score>X</overall score>\n"}], "audios": ["/root/autodl-tmp/wavrewardDataset/conversations/data/testdata/predict_result_mission4/audios/第16开始txt不规范/018.wav"], "solution": 2}
+{"messages": [{"role": "user", "content": "<audio># Interactional Dialogue Evaluation\n\n**IMPORTANT**: Evaluation must include `<response think>` and `<fluency think>` analysis and `<overall score>` rating.\nListen to a two-person interactional dialogue speech (Dual-channel audio, with each channel representing one speaker), labeled as speakers A and B. Evaluate the quality of the interaction, focusing on:\n**Response Relevance:** \n**logical consistency, topic coherence**\n**Interactional Fluency:**\n**Detect and evaluate extended vocal overlaps, e.g., cross-channel overlap.**\n**Detect and evaluate long pauses, e.g., pauses more than 3s between speaker turns.\n\n****Note**: Small pauses and brief overlaps in audio are acceptable, while prolonged pauses and overlapping audio are harmful. You should consider Response Relevance and Interactional Fluency separately, and provide the corresponding thinking process.\n\n## Scoring Criteria\nAssign a single holistic score based on the combined evaluation:\n`1` (Poor): Significant issues in either  **Response Relevance ** or  **Interactional Fluency. **\n`2` (Excellent): Both **Response Relevance ** and  **Interactional Fluency ** are consistently appropriate and natural.\n## Evaluation Output Format:\nStrictly follow this template:\n<response think>\n[Analysing Response Relevance and giving reasons for scoring...]\n</response think>\n<fluency think>\n[Analysing Interactional Fluency and giving reasons for scoring.]\n</fluency think>\n<overall score>X</overall score>\n"}], "audios": ["/root/autodl-tmp/wavrewardDataset/conversations/data/testdata/predict_result_mission4/audios/第16开始txt不规范/019.wav"], "solution": 2}
+{"messages": [{"role": "user", "content": "<audio># Interactional Dialogue Evaluation\n\n**IMPORTANT**: Evaluation must include `<response think>` and `<fluency think>` analysis and `<overall score>` rating.\nListen to a two-person interactional dialogue speech (Dual-channel audio, with each channel representing one speaker), labeled as speakers A and B. Evaluate the quality of the interaction, focusing on:\n**Response Relevance:** \n**logical consistency, topic coherence**\n**Interactional Fluency:**\n**Detect and evaluate extended vocal overlaps, e.g., cross-channel overlap.**\n**Detect and evaluate long pauses, e.g., pauses more than 3s between speaker turns.\n\n****Note**: Small pauses and brief overlaps in audio are acceptable, while prolonged pauses and overlapping audio are harmful. You should consider Response Relevance and Interactional Fluency separately, and provide the corresponding thinking process.\n\n## Scoring Criteria\nAssign a single holistic score based on the combined evaluation:\n`1` (Poor): Significant issues in either  **Response Relevance ** or  **Interactional Fluency. **\n`2` (Excellent): Both **Response Relevance ** and  **Interactional Fluency ** are consistently appropriate and natural.\n## Evaluation Output Format:\nStrictly follow this template:\n<response think>\n[Analysing Response Relevance and giving reasons for scoring...]\n</response think>\n<fluency think>\n[Analysing Interactional Fluency and giving reasons for scoring.]\n</fluency think>\n<overall score>X</overall score>\n"}], "audios": ["/root/autodl-tmp/wavrewardDataset/conversations/data/testdata/predict_result_mission4/audios/第16开始txt不规范/020.wav"], "solution": 2}
+{"messages": [{"role": "user", "content": "<audio># Interactional Dialogue Evaluation\n\n**IMPORTANT**: Evaluation must include `<response think>` and `<fluency think>` analysis and `<overall score>` rating.\nListen to a two-person interactional dialogue speech (Dual-channel audio, with each channel representing one speaker), labeled as speakers A and B. Evaluate the quality of the interaction, focusing on:\n**Response Relevance:** \n**logical consistency, topic coherence**\n**Interactional Fluency:**\n**Detect and evaluate extended vocal overlaps, e.g., cross-channel overlap.**\n**Detect and evaluate long pauses, e.g., pauses more than 3s between speaker turns.\n\n****Note**: Small pauses and brief overlaps in audio are acceptable, while prolonged pauses and overlapping audio are harmful. You should consider Response Relevance and Interactional Fluency separately, and provide the corresponding thinking process.\n\n## Scoring Criteria\nAssign a single holistic score based on the combined evaluation:\n`1` (Poor): Significant issues in either  **Response Relevance ** or  **Interactional Fluency. **\n`2` (Excellent): Both **Response Relevance ** and  **Interactional Fluency ** are consistently appropriate and natural.\n## Evaluation Output Format:\nStrictly follow this template:\n<response think>\n[Analysing Response Relevance and giving reasons for scoring...]\n</response think>\n<fluency think>\n[Analysing Interactional Fluency and giving reasons for scoring.]\n</fluency think>\n<overall score>X</overall score>\n"}], "audios": ["/root/autodl-tmp/wavrewardDataset/conversations/data/testdata/predict_result_mission4/audios/第16开始txt不规范/021.wav"], "solution": 2}
+{"messages": [{"role": "user", "content": "<audio># Interactional Dialogue Evaluation\n\n**IMPORTANT**: Evaluation must include `<response think>` and `<fluency think>` analysis and `<overall score>` rating.\nListen to a two-person interactional dialogue speech (Dual-channel audio, with each channel representing one speaker), labeled as speakers A and B. Evaluate the quality of the interaction, focusing on:\n**Response Relevance:** \n**logical consistency, topic coherence**\n**Interactional Fluency:**\n**Detect and evaluate extended vocal overlaps, e.g., cross-channel overlap.**\n**Detect and evaluate long pauses, e.g., pauses more than 3s between speaker turns.\n\n****Note**: Small pauses and brief overlaps in audio are acceptable, while prolonged pauses and overlapping audio are harmful. You should consider Response Relevance and Interactional Fluency separately, and provide the corresponding thinking process.\n\n## Scoring Criteria\nAssign a single holistic score based on the combined evaluation:\n`1` (Poor): Significant issues in either  **Response Relevance ** or  **Interactional Fluency. **\n`2` (Excellent): Both **Response Relevance ** and  **Interactional Fluency ** are consistently appropriate and natural.\n## Evaluation Output Format:\nStrictly follow this template:\n<response think>\n[Analysing Response Relevance and giving reasons for scoring...]\n</response think>\n<fluency think>\n[Analysing Interactional Fluency and giving reasons for scoring.]\n</fluency think>\n<overall score>X</overall score>\n"}], "audios": ["/root/autodl-tmp/wavrewardDataset/conversations/data/testdata/predict_result_mission4/audios/第16开始txt不规范/022.wav"], "solution": 2}
+{"messages": [{"role": "user", "content": "<audio># Interactional Dialogue Evaluation\n\n**IMPORTANT**: Evaluation must include `<response think>` and `<fluency think>` analysis and `<overall score>` rating.\nListen to a two-person interactional dialogue speech (Dual-channel audio, with each channel representing one speaker), labeled as speakers A and B. Evaluate the quality of the interaction, focusing on:\n**Response Relevance:** \n**logical consistency, topic coherence**\n**Interactional Fluency:**\n**Detect and evaluate extended vocal overlaps, e.g., cross-channel overlap.**\n**Detect and evaluate long pauses, e.g., pauses more than 3s between speaker turns.\n\n****Note**: Small pauses and brief overlaps in audio are acceptable, while prolonged pauses and overlapping audio are harmful. You should consider Response Relevance and Interactional Fluency separately, and provide the corresponding thinking process.\n\n## Scoring Criteria\nAssign a single holistic score based on the combined evaluation:\n`1` (Poor): Significant issues in either  **Response Relevance ** or  **Interactional Fluency. **\n`2` (Excellent): Both **Response Relevance ** and  **Interactional Fluency ** are consistently appropriate and natural.\n## Evaluation Output Format:\nStrictly follow this template:\n<response think>\n[Analysing Response Relevance and giving reasons for scoring...]\n</response think>\n<fluency think>\n[Analysing Interactional Fluency and giving reasons for scoring.]\n</fluency think>\n<overall score>X</overall score>\n"}], "audios": ["/root/autodl-tmp/wavrewardDataset/conversations/data/testdata/predict_result_mission4/audios/xiaoyuaudios/xiaoyu1.wav"], "solution": 2}
+{"messages": [{"role": "user", "content": "<audio># Interactional Dialogue Evaluation\n\n**IMPORTANT**: Evaluation must include `<response think>` and `<fluency think>` analysis and `<overall score>` rating.\nListen to a two-person interactional dialogue speech (Dual-channel audio, with each channel representing one speaker), labeled as speakers A and B. Evaluate the quality of the interaction, focusing on:\n**Response Relevance:** \n**logical consistency, topic coherence**\n**Interactional Fluency:**\n**Detect and evaluate extended vocal overlaps, e.g., cross-channel overlap.**\n**Detect and evaluate long pauses, e.g., pauses more than 3s between speaker turns.\n\n****Note**: Small pauses and brief overlaps in audio are acceptable, while prolonged pauses and overlapping audio are harmful. You should consider Response Relevance and Interactional Fluency separately, and provide the corresponding thinking process.\n\n## Scoring Criteria\nAssign a single holistic score based on the combined evaluation:\n`1` (Poor): Significant issues in either  **Response Relevance ** or  **Interactional Fluency. **\n`2` (Excellent): Both **Response Relevance ** and  **Interactional Fluency ** are consistently appropriate and natural.\n## Evaluation Output Format:\nStrictly follow this template:\n<response think>\n[Analysing Response Relevance and giving reasons for scoring...]\n</response think>\n<fluency think>\n[Analysing Interactional Fluency and giving reasons for scoring.]\n</fluency think>\n<overall score>X</overall score>\n"}], "audios": ["/root/autodl-tmp/wavrewardDataset/conversations/data/testdata/predict_result_mission4/audios/xiaoyuaudios/xiaoyu2.wav"], "solution": 2}
+{"messages": [{"role": "user", "content": "<audio># Interactional Dialogue Evaluation\n\n**IMPORTANT**: Evaluation must include `<response think>` and `<fluency think>` analysis and `<overall score>` rating.\nListen to a two-person interactional dialogue speech (Dual-channel audio, with each channel representing one speaker), labeled as speakers A and B. Evaluate the quality of the interaction, focusing on:\n**Response Relevance:** \n**logical consistency, topic coherence**\n**Interactional Fluency:**\n**Detect and evaluate extended vocal overlaps, e.g., cross-channel overlap.**\n**Detect and evaluate long pauses, e.g., pauses more than 3s between speaker turns.\n\n****Note**: Small pauses and brief overlaps in audio are acceptable, while prolonged pauses and overlapping audio are harmful. You should consider Response Relevance and Interactional Fluency separately, and provide the corresponding thinking process.\n\n## Scoring Criteria\nAssign a single holistic score based on the combined evaluation:\n`1` (Poor): Significant issues in either  **Response Relevance ** or  **Interactional Fluency. **\n`2` (Excellent): Both **Response Relevance ** and  **Interactional Fluency ** are consistently appropriate and natural.\n## Evaluation Output Format:\nStrictly follow this template:\n<response think>\n[Analysing Response Relevance and giving reasons for scoring...]\n</response think>\n<fluency think>\n[Analysing Interactional Fluency and giving reasons for scoring.]\n</fluency think>\n<overall score>X</overall score>\n"}], "audios": ["/root/autodl-tmp/wavrewardDataset/conversations/data/testdata/predict_result_mission4/audios/xiaoyuaudios/xiaoyu3.wav"], "solution": 2}
+{"messages": [{"role": "user", "content": "<audio># Interactional Dialogue Evaluation\n\n**IMPORTANT**: Evaluation must include `<response think>` and `<fluency think>` analysis and `<overall score>` rating.\nListen to a two-person interactional dialogue speech (Dual-channel audio, with each channel representing one speaker), labeled as speakers A and B. Evaluate the quality of the interaction, focusing on:\n**Response Relevance:** \n**logical consistency, topic coherence**\n**Interactional Fluency:**\n**Detect and evaluate extended vocal overlaps, e.g., cross-channel overlap.**\n**Detect and evaluate long pauses, e.g., pauses more than 3s between speaker turns.\n\n****Note**: Small pauses and brief overlaps in audio are acceptable, while prolonged pauses and overlapping audio are harmful. You should consider Response Relevance and Interactional Fluency separately, and provide the corresponding thinking process.\n\n## Scoring Criteria\nAssign a single holistic score based on the combined evaluation:\n`1` (Poor): Significant issues in either  **Response Relevance ** or  **Interactional Fluency. **\n`2` (Excellent): Both **Response Relevance ** and  **Interactional Fluency ** are consistently appropriate and natural.\n## Evaluation Output Format:\nStrictly follow this template:\n<response think>\n[Analysing Response Relevance and giving reasons for scoring...]\n</response think>\n<fluency think>\n[Analysing Interactional Fluency and giving reasons for scoring.]\n</fluency think>\n<overall score>X</overall score>\n"}], "audios": ["/root/autodl-tmp/wavrewardDataset/conversations/data/testdata/predict_result_mission4/audios/xiaoyuaudios/xiaoyu4.wav"], "solution": 1}
+{"messages": [{"role": "user", "content": "<audio># Interactional Dialogue Evaluation\n\n**IMPORTANT**: Evaluation must include `<response think>` and `<fluency think>` analysis and `<overall score>` rating.\nListen to a two-person interactional dialogue speech (Dual-channel audio, with each channel representing one speaker), labeled as speakers A and B. Evaluate the quality of the interaction, focusing on:\n**Response Relevance:** \n**logical consistency, topic coherence**\n**Interactional Fluency:**\n**Detect and evaluate extended vocal overlaps, e.g., cross-channel overlap.**\n**Detect and evaluate long pauses, e.g., pauses more than 3s between speaker turns.\n\n****Note**: Small pauses and brief overlaps in audio are acceptable, while prolonged pauses and overlapping audio are harmful. You should consider Response Relevance and Interactional Fluency separately, and provide the corresponding thinking process.\n\n## Scoring Criteria\nAssign a single holistic score based on the combined evaluation:\n`1` (Poor): Significant issues in either  **Response Relevance ** or  **Interactional Fluency. **\n`2` (Excellent): Both **Response Relevance ** and  **Interactional Fluency ** are consistently appropriate and natural.\n## Evaluation Output Format:\nStrictly follow this template:\n<response think>\n[Analysing Response Relevance and giving reasons for scoring...]\n</response think>\n<fluency think>\n[Analysing Interactional Fluency and giving reasons for scoring.]\n</fluency think>\n<overall score>X</overall score>\n"}], "audios": ["/root/autodl-tmp/wavrewardDataset/conversations/data/testdata/predict_result_mission4/audios/xiaoyuaudios/xiaoyu5.wav"], "solution": 1}
+{"messages": [{"role": "user", "content": "<audio># Interactional Dialogue Evaluation\n\n**IMPORTANT**: Evaluation must include `<response think>` and `<fluency think>` analysis and `<overall score>` rating.\nListen to a two-person interactional dialogue speech (Dual-channel audio, with each channel representing one speaker), labeled as speakers A and B. Evaluate the quality of the interaction, focusing on:\n**Response Relevance:** \n**logical consistency, topic coherence**\n**Interactional Fluency:**\n**Detect and evaluate extended vocal overlaps, e.g., cross-channel overlap.**\n**Detect and evaluate long pauses, e.g., pauses more than 3s between speaker turns.\n\n****Note**: Small pauses and brief overlaps in audio are acceptable, while prolonged pauses and overlapping audio are harmful. You should consider Response Relevance and Interactional Fluency separately, and provide the corresponding thinking process.\n\n## Scoring Criteria\nAssign a single holistic score based on the combined evaluation:\n`1` (Poor): Significant issues in either  **Response Relevance ** or  **Interactional Fluency. **\n`2` (Excellent): Both **Response Relevance ** and  **Interactional Fluency ** are consistently appropriate and natural.\n## Evaluation Output Format:\nStrictly follow this template:\n<response think>\n[Analysing Response Relevance and giving reasons for scoring...]\n</response think>\n<fluency think>\n[Analysing Interactional Fluency and giving reasons for scoring.]\n</fluency think>\n<overall score>X</overall score>\n"}], "audios": ["/root/autodl-tmp/wavrewardDataset/conversations/data/testdata/predict_result_mission4/audios/duihua/duihua/001.wav"], "solution": 1}
+{"messages": [{"role": "user", "content": "<audio># Interactional Dialogue Evaluation\n\n**IMPORTANT**: Evaluation must include `<response think>` and `<fluency think>` analysis and `<overall score>` rating.\nListen to a two-person interactional dialogue speech (Dual-channel audio, with each channel representing one speaker), labeled as speakers A and B. Evaluate the quality of the interaction, focusing on:\n**Response Relevance:** \n**logical consistency, topic coherence**\n**Interactional Fluency:**\n**Detect and evaluate extended vocal overlaps, e.g., cross-channel overlap.**\n**Detect and evaluate long pauses, e.g., pauses more than 3s between speaker turns.\n\n****Note**: Small pauses and brief overlaps in audio are acceptable, while prolonged pauses and overlapping audio are harmful. You should consider Response Relevance and Interactional Fluency separately, and provide the corresponding thinking process.\n\n## Scoring Criteria\nAssign a single holistic score based on the combined evaluation:\n`1` (Poor): Significant issues in either  **Response Relevance ** or  **Interactional Fluency. **\n`2` (Excellent): Both **Response Relevance ** and  **Interactional Fluency ** are consistently appropriate and natural.\n## Evaluation Output Format:\nStrictly follow this template:\n<response think>\n[Analysing Response Relevance and giving reasons for scoring...]\n</response think>\n<fluency think>\n[Analysing Interactional Fluency and giving reasons for scoring.]\n</fluency think>\n<overall score>X</overall score>\n"}], "audios": ["/root/autodl-tmp/wavrewardDataset/conversations/data/testdata/predict_result_mission4/audios/duihua/duihua/002.wav"], "solution": 1}
+{"messages": [{"role": "user", "content": "<audio># Interactional Dialogue Evaluation\n\n**IMPORTANT**: Evaluation must include `<response think>` and `<fluency think>` analysis and `<overall score>` rating.\nListen to a two-person interactional dialogue speech (Dual-channel audio, with each channel representing one speaker), labeled as speakers A and B. Evaluate the quality of the interaction, focusing on:\n**Response Relevance:** \n**logical consistency, topic coherence**\n**Interactional Fluency:**\n**Detect and evaluate extended vocal overlaps, e.g., cross-channel overlap.**\n**Detect and evaluate long pauses, e.g., pauses more than 3s between speaker turns.\n\n****Note**: Small pauses and brief overlaps in audio are acceptable, while prolonged pauses and overlapping audio are harmful. You should consider Response Relevance and Interactional Fluency separately, and provide the corresponding thinking process.\n\n## Scoring Criteria\nAssign a single holistic score based on the combined evaluation:\n`1` (Poor): Significant issues in either  **Response Relevance ** or  **Interactional Fluency. **\n`2` (Excellent): Both **Response Relevance ** and  **Interactional Fluency ** are consistently appropriate and natural.\n## Evaluation Output Format:\nStrictly follow this template:\n<response think>\n[Analysing Response Relevance and giving reasons for scoring...]\n</response think>\n<fluency think>\n[Analysing Interactional Fluency and giving reasons for scoring.]\n</fluency think>\n<overall score>X</overall score>\n"}], "audios": ["/root/autodl-tmp/wavrewardDataset/conversations/data/testdata/predict_result_mission4/audios/duihua/duihua/003.wav"], "solution": 2}
+{"messages": [{"role": "user", "content": "<audio># Interactional Dialogue Evaluation\n\n**IMPORTANT**: Evaluation must include `<response think>` and `<fluency think>` analysis and `<overall score>` rating.\nListen to a two-person interactional dialogue speech (Dual-channel audio, with each channel representing one speaker), labeled as speakers A and B. Evaluate the quality of the interaction, focusing on:\n**Response Relevance:** \n**logical consistency, topic coherence**\n**Interactional Fluency:**\n**Detect and evaluate extended vocal overlaps, e.g., cross-channel overlap.**\n**Detect and evaluate long pauses, e.g., pauses more than 3s between speaker turns.\n\n****Note**: Small pauses and brief overlaps in audio are acceptable, while prolonged pauses and overlapping audio are harmful. You should consider Response Relevance and Interactional Fluency separately, and provide the corresponding thinking process.\n\n## Scoring Criteria\nAssign a single holistic score based on the combined evaluation:\n`1` (Poor): Significant issues in either  **Response Relevance ** or  **Interactional Fluency. **\n`2` (Excellent): Both **Response Relevance ** and  **Interactional Fluency ** are consistently appropriate and natural.\n## Evaluation Output Format:\nStrictly follow this template:\n<response think>\n[Analysing Response Relevance and giving reasons for scoring...]\n</response think>\n<fluency think>\n[Analysing Interactional Fluency and giving reasons for scoring.]\n</fluency think>\n<overall score>X</overall score>\n"}], "audios": ["/root/autodl-tmp/wavrewardDataset/conversations/data/testdata/predict_result_mission4/audios/duihua/duihua/004.wav"], "solution": 1}
+{"messages": [{"role": "user", "content": "<audio># Interactional Dialogue Evaluation\n\n**IMPORTANT**: Evaluation must include `<response think>` and `<fluency think>` analysis and `<overall score>` rating.\nListen to a two-person interactional dialogue speech (Dual-channel audio, with each channel representing one speaker), labeled as speakers A and B. Evaluate the quality of the interaction, focusing on:\n**Response Relevance:** \n**logical consistency, topic coherence**\n**Interactional Fluency:**\n**Detect and evaluate extended vocal overlaps, e.g., cross-channel overlap.**\n**Detect and evaluate long pauses, e.g., pauses more than 3s between speaker turns.\n\n****Note**: Small pauses and brief overlaps in audio are acceptable, while prolonged pauses and overlapping audio are harmful. You should consider Response Relevance and Interactional Fluency separately, and provide the corresponding thinking process.\n\n## Scoring Criteria\nAssign a single holistic score based on the combined evaluation:\n`1` (Poor): Significant issues in either  **Response Relevance ** or  **Interactional Fluency. **\n`2` (Excellent): Both **Response Relevance ** and  **Interactional Fluency ** are consistently appropriate and natural.\n## Evaluation Output Format:\nStrictly follow this template:\n<response think>\n[Analysing Response Relevance and giving reasons for scoring...]\n</response think>\n<fluency think>\n[Analysing Interactional Fluency and giving reasons for scoring.]\n</fluency think>\n<overall score>X</overall score>\n"}], "audios": ["/root/autodl-tmp/wavrewardDataset/conversations/data/testdata/predict_result_mission4/audios/duihua/duihua/005.wav"], "solution": 1}
+{"messages": [{"role": "user", "content": "<audio># Interactional Dialogue Evaluation\n\n**IMPORTANT**: Evaluation must include `<response think>` and `<fluency think>` analysis and `<overall score>` rating.\nListen to a two-person interactional dialogue speech (Dual-channel audio, with each channel representing one speaker), labeled as speakers A and B. Evaluate the quality of the interaction, focusing on:\n**Response Relevance:** \n**logical consistency, topic coherence**\n**Interactional Fluency:**\n**Detect and evaluate extended vocal overlaps, e.g., cross-channel overlap.**\n**Detect and evaluate long pauses, e.g., pauses more than 3s between speaker turns.\n\n****Note**: Small pauses and brief overlaps in audio are acceptable, while prolonged pauses and overlapping audio are harmful. You should consider Response Relevance and Interactional Fluency separately, and provide the corresponding thinking process.\n\n## Scoring Criteria\nAssign a single holistic score based on the combined evaluation:\n`1` (Poor): Significant issues in either  **Response Relevance ** or  **Interactional Fluency. **\n`2` (Excellent): Both **Response Relevance ** and  **Interactional Fluency ** are consistently appropriate and natural.\n## Evaluation Output Format:\nStrictly follow this template:\n<response think>\n[Analysing Response Relevance and giving reasons for scoring...]\n</response think>\n<fluency think>\n[Analysing Interactional Fluency and giving reasons for scoring.]\n</fluency think>\n<overall score>X</overall score>\n"}], "audios": ["/root/autodl-tmp/wavrewardDataset/conversations/data/testdata/predict_result_mission4/audios/duihua/duihua/006.wav"], "solution": 2}
+{"messages": [{"role": "user", "content": "<audio># Interactional Dialogue Evaluation\n\n**IMPORTANT**: Evaluation must include `<response think>` and `<fluency think>` analysis and `<overall score>` rating.\nListen to a two-person interactional dialogue speech (Dual-channel audio, with each channel representing one speaker), labeled as speakers A and B. Evaluate the quality of the interaction, focusing on:\n**Response Relevance:** \n**logical consistency, topic coherence**\n**Interactional Fluency:**\n**Detect and evaluate extended vocal overlaps, e.g., cross-channel overlap.**\n**Detect and evaluate long pauses, e.g., pauses more than 3s between speaker turns.\n\n****Note**: Small pauses and brief overlaps in audio are acceptable, while prolonged pauses and overlapping audio are harmful. You should consider Response Relevance and Interactional Fluency separately, and provide the corresponding thinking process.\n\n## Scoring Criteria\nAssign a single holistic score based on the combined evaluation:\n`1` (Poor): Significant issues in either  **Response Relevance ** or  **Interactional Fluency. **\n`2` (Excellent): Both **Response Relevance ** and  **Interactional Fluency ** are consistently appropriate and natural.\n## Evaluation Output Format:\nStrictly follow this template:\n<response think>\n[Analysing Response Relevance and giving reasons for scoring...]\n</response think>\n<fluency think>\n[Analysing Interactional Fluency and giving reasons for scoring.]\n</fluency think>\n<overall score>X</overall score>\n"}], "audios": ["/root/autodl-tmp/wavrewardDataset/conversations/data/testdata/predict_result_mission4/audios/duihua/duihua/007.wav"], "solution": 1}
+{"messages": [{"role": "user", "content": "<audio># Interactional Dialogue Evaluation\n\n**IMPORTANT**: Evaluation must include `<response think>` and `<fluency think>` analysis and `<overall score>` rating.\nListen to a two-person interactional dialogue speech (Dual-channel audio, with each channel representing one speaker), labeled as speakers A and B. Evaluate the quality of the interaction, focusing on:\n**Response Relevance:** \n**logical consistency, topic coherence**\n**Interactional Fluency:**\n**Detect and evaluate extended vocal overlaps, e.g., cross-channel overlap.**\n**Detect and evaluate long pauses, e.g., pauses more than 3s between speaker turns.\n\n****Note**: Small pauses and brief overlaps in audio are acceptable, while prolonged pauses and overlapping audio are harmful. You should consider Response Relevance and Interactional Fluency separately, and provide the corresponding thinking process.\n\n## Scoring Criteria\nAssign a single holistic score based on the combined evaluation:\n`1` (Poor): Significant issues in either  **Response Relevance ** or  **Interactional Fluency. **\n`2` (Excellent): Both **Response Relevance ** and  **Interactional Fluency ** are consistently appropriate and natural.\n## Evaluation Output Format:\nStrictly follow this template:\n<response think>\n[Analysing Response Relevance and giving reasons for scoring...]\n</response think>\n<fluency think>\n[Analysing Interactional Fluency and giving reasons for scoring.]\n</fluency think>\n<overall score>X</overall score>\n"}], "audios": ["/root/autodl-tmp/wavrewardDataset/conversations/data/testdata/predict_result_mission4/audios/duihua/duihua/008.wav"], "solution": 2}
+{"messages": [{"role": "user", "content": "<audio># Interactional Dialogue Evaluation\n\n**IMPORTANT**: Evaluation must include `<response think>` and `<fluency think>` analysis and `<overall score>` rating.\nListen to a two-person interactional dialogue speech (Dual-channel audio, with each channel representing one speaker), labeled as speakers A and B. Evaluate the quality of the interaction, focusing on:\n**Response Relevance:** \n**logical consistency, topic coherence**\n**Interactional Fluency:**\n**Detect and evaluate extended vocal overlaps, e.g., cross-channel overlap.**\n**Detect and evaluate long pauses, e.g., pauses more than 3s between speaker turns.\n\n****Note**: Small pauses and brief overlaps in audio are acceptable, while prolonged pauses and overlapping audio are harmful. You should consider Response Relevance and Interactional Fluency separately, and provide the corresponding thinking process.\n\n## Scoring Criteria\nAssign a single holistic score based on the combined evaluation:\n`1` (Poor): Significant issues in either  **Response Relevance ** or  **Interactional Fluency. **\n`2` (Excellent): Both **Response Relevance ** and  **Interactional Fluency ** are consistently appropriate and natural.\n## Evaluation Output Format:\nStrictly follow this template:\n<response think>\n[Analysing Response Relevance and giving reasons for scoring...]\n</response think>\n<fluency think>\n[Analysing Interactional Fluency and giving reasons for scoring.]\n</fluency think>\n<overall score>X</overall score>\n"}], "audios": ["/root/autodl-tmp/wavrewardDataset/conversations/data/testdata/predict_result_mission4/audios/duihua/duihua/009.wav"], "solution": 2}
+{"messages": [{"role": "user", "content": "<audio># Interactional Dialogue Evaluation\n\n**IMPORTANT**: Evaluation must include `<response think>` and `<fluency think>` analysis and `<overall score>` rating.\nListen to a two-person interactional dialogue speech (Dual-channel audio, with each channel representing one speaker), labeled as speakers A and B. Evaluate the quality of the interaction, focusing on:\n**Response Relevance:** \n**logical consistency, topic coherence**\n**Interactional Fluency:**\n**Detect and evaluate extended vocal overlaps, e.g., cross-channel overlap.**\n**Detect and evaluate long pauses, e.g., pauses more than 3s between speaker turns.\n\n****Note**: Small pauses and brief overlaps in audio are acceptable, while prolonged pauses and overlapping audio are harmful. You should consider Response Relevance and Interactional Fluency separately, and provide the corresponding thinking process.\n\n## Scoring Criteria\nAssign a single holistic score based on the combined evaluation:\n`1` (Poor): Significant issues in either  **Response Relevance ** or  **Interactional Fluency. **\n`2` (Excellent): Both **Response Relevance ** and  **Interactional Fluency ** are consistently appropriate and natural.\n## Evaluation Output Format:\nStrictly follow this template:\n<response think>\n[Analysing Response Relevance and giving reasons for scoring...]\n</response think>\n<fluency think>\n[Analysing Interactional Fluency and giving reasons for scoring.]\n</fluency think>\n<overall score>X</overall score>\n"}], "audios": ["/root/autodl-tmp/wavrewardDataset/conversations/data/testdata/predict_result_mission4/audios/duihua/duihua/010.wav"], "solution": 2}
+{"messages": [{"role": "user", "content": "<audio># Interactional Dialogue Evaluation\n\n**IMPORTANT**: Evaluation must include `<response think>` and `<fluency think>` analysis and `<overall score>` rating.\nListen to a two-person interactional dialogue speech (Dual-channel audio, with each channel representing one speaker), labeled as speakers A and B. Evaluate the quality of the interaction, focusing on:\n**Response Relevance:** \n**logical consistency, topic coherence**\n**Interactional Fluency:**\n**Detect and evaluate extended vocal overlaps, e.g., cross-channel overlap.**\n**Detect and evaluate long pauses, e.g., pauses more than 3s between speaker turns.\n\n****Note**: Small pauses and brief overlaps in audio are acceptable, while prolonged pauses and overlapping audio are harmful. You should consider Response Relevance and Interactional Fluency separately, and provide the corresponding thinking process.\n\n## Scoring Criteria\nAssign a single holistic score based on the combined evaluation:\n`1` (Poor): Significant issues in either  **Response Relevance ** or  **Interactional Fluency. **\n`2` (Excellent): Both **Response Relevance ** and  **Interactional Fluency ** are consistently appropriate and natural.\n## Evaluation Output Format:\nStrictly follow this template:\n<response think>\n[Analysing Response Relevance and giving reasons for scoring...]\n</response think>\n<fluency think>\n[Analysing Interactional Fluency and giving reasons for scoring.]\n</fluency think>\n<overall score>X</overall score>\n"}], "audios": ["/root/autodl-tmp/wavrewardDataset/conversations/data/testdata/predict_result_mission4/audios/duihua/duihua/011.wav"], "solution": 1}
+{"messages": [{"role": "user", "content": "<audio># Interactional Dialogue Evaluation\n\n**IMPORTANT**: Evaluation must include `<response think>` and `<fluency think>` analysis and `<overall score>` rating.\nListen to a two-person interactional dialogue speech (Dual-channel audio, with each channel representing one speaker), labeled as speakers A and B. Evaluate the quality of the interaction, focusing on:\n**Response Relevance:** \n**logical consistency, topic coherence**\n**Interactional Fluency:**\n**Detect and evaluate extended vocal overlaps, e.g., cross-channel overlap.**\n**Detect and evaluate long pauses, e.g., pauses more than 3s between speaker turns.\n\n****Note**: Small pauses and brief overlaps in audio are acceptable, while prolonged pauses and overlapping audio are harmful. You should consider Response Relevance and Interactional Fluency separately, and provide the corresponding thinking process.\n\n## Scoring Criteria\nAssign a single holistic score based on the combined evaluation:\n`1` (Poor): Significant issues in either  **Response Relevance ** or  **Interactional Fluency. **\n`2` (Excellent): Both **Response Relevance ** and  **Interactional Fluency ** are consistently appropriate and natural.\n## Evaluation Output Format:\nStrictly follow this template:\n<response think>\n[Analysing Response Relevance and giving reasons for scoring...]\n</response think>\n<fluency think>\n[Analysing Interactional Fluency and giving reasons for scoring.]\n</fluency think>\n<overall score>X</overall score>\n"}], "audios": ["/root/autodl-tmp/wavrewardDataset/conversations/data/testdata/predict_result_mission4/audios/duihua/duihua/012.wav"], "solution": 2}
+{"messages": [{"role": "user", "content": "<audio># Interactional Dialogue Evaluation\n\n**IMPORTANT**: Evaluation must include `<response think>` and `<fluency think>` analysis and `<overall score>` rating.\nListen to a two-person interactional dialogue speech (Dual-channel audio, with each channel representing one speaker), labeled as speakers A and B. Evaluate the quality of the interaction, focusing on:\n**Response Relevance:** \n**logical consistency, topic coherence**\n**Interactional Fluency:**\n**Detect and evaluate extended vocal overlaps, e.g., cross-channel overlap.**\n**Detect and evaluate long pauses, e.g., pauses more than 3s between speaker turns.\n\n****Note**: Small pauses and brief overlaps in audio are acceptable, while prolonged pauses and overlapping audio are harmful. You should consider Response Relevance and Interactional Fluency separately, and provide the corresponding thinking process.\n\n## Scoring Criteria\nAssign a single holistic score based on the combined evaluation:\n`1` (Poor): Significant issues in either  **Response Relevance ** or  **Interactional Fluency. **\n`2` (Excellent): Both **Response Relevance ** and  **Interactional Fluency ** are consistently appropriate and natural.\n## Evaluation Output Format:\nStrictly follow this template:\n<response think>\n[Analysing Response Relevance and giving reasons for scoring...]\n</response think>\n<fluency think>\n[Analysing Interactional Fluency and giving reasons for scoring.]\n</fluency think>\n<overall score>X</overall score>\n"}], "audios": ["/root/autodl-tmp/wavrewardDataset/conversations/data/testdata/predict_result_mission4/audios/duihua/duihua/013.wav"], "solution": 1}
+{"messages": [{"role": "user", "content": "<audio># Interactional Dialogue Evaluation\n\n**IMPORTANT**: Evaluation must include `<response think>` and `<fluency think>` analysis and `<overall score>` rating.\nListen to a two-person interactional dialogue speech (Dual-channel audio, with each channel representing one speaker), labeled as speakers A and B. Evaluate the quality of the interaction, focusing on:\n**Response Relevance:** \n**logical consistency, topic coherence**\n**Interactional Fluency:**\n**Detect and evaluate extended vocal overlaps, e.g., cross-channel overlap.**\n**Detect and evaluate long pauses, e.g., pauses more than 3s between speaker turns.\n\n****Note**: Small pauses and brief overlaps in audio are acceptable, while prolonged pauses and overlapping audio are harmful. You should consider Response Relevance and Interactional Fluency separately, and provide the corresponding thinking process.\n\n## Scoring Criteria\nAssign a single holistic score based on the combined evaluation:\n`1` (Poor): Significant issues in either  **Response Relevance ** or  **Interactional Fluency. **\n`2` (Excellent): Both **Response Relevance ** and  **Interactional Fluency ** are consistently appropriate and natural.\n## Evaluation Output Format:\nStrictly follow this template:\n<response think>\n[Analysing Response Relevance and giving reasons for scoring...]\n</response think>\n<fluency think>\n[Analysing Interactional Fluency and giving reasons for scoring.]\n</fluency think>\n<overall score>X</overall score>\n"}], "audios": ["/root/autodl-tmp/wavrewardDataset/conversations/data/testdata/predict_result_mission4/audios/duihua/duihua/014.wav"], "solution": 1}
+{"messages": [{"role": "user", "content": "<audio># Interactional Dialogue Evaluation\n\n**IMPORTANT**: Evaluation must include `<response think>` and `<fluency think>` analysis and `<overall score>` rating.\nListen to a two-person interactional dialogue speech (Dual-channel audio, with each channel representing one speaker), labeled as speakers A and B. Evaluate the quality of the interaction, focusing on:\n**Response Relevance:** \n**logical consistency, topic coherence**\n**Interactional Fluency:**\n**Detect and evaluate extended vocal overlaps, e.g., cross-channel overlap.**\n**Detect and evaluate long pauses, e.g., pauses more than 3s between speaker turns.\n\n****Note**: Small pauses and brief overlaps in audio are acceptable, while prolonged pauses and overlapping audio are harmful. You should consider Response Relevance and Interactional Fluency separately, and provide the corresponding thinking process.\n\n## Scoring Criteria\nAssign a single holistic score based on the combined evaluation:\n`1` (Poor): Significant issues in either  **Response Relevance ** or  **Interactional Fluency. **\n`2` (Excellent): Both **Response Relevance ** and  **Interactional Fluency ** are consistently appropriate and natural.\n## Evaluation Output Format:\nStrictly follow this template:\n<response think>\n[Analysing Response Relevance and giving reasons for scoring...]\n</response think>\n<fluency think>\n[Analysing Interactional Fluency and giving reasons for scoring.]\n</fluency think>\n<overall score>X</overall score>\n"}], "audios": ["/root/autodl-tmp/wavrewardDataset/conversations/data/testdata/predict_result_mission4/audios/duihua/duihua/015.wav"], "solution": 1}
+{"messages": [{"role": "user", "content": "<audio># Interactional Dialogue Evaluation\n\n**IMPORTANT**: Evaluation must include `<response think>` and `<fluency think>` analysis and `<overall score>` rating.\nListen to a two-person interactional dialogue speech (Dual-channel audio, with each channel representing one speaker), labeled as speakers A and B. Evaluate the quality of the interaction, focusing on:\n**Response Relevance:** \n**logical consistency, topic coherence**\n**Interactional Fluency:**\n**Detect and evaluate extended vocal overlaps, e.g., cross-channel overlap.**\n**Detect and evaluate long pauses, e.g., pauses more than 3s between speaker turns.\n\n****Note**: Small pauses and brief overlaps in audio are acceptable, while prolonged pauses and overlapping audio are harmful. You should consider Response Relevance and Interactional Fluency separately, and provide the corresponding thinking process.\n\n## Scoring Criteria\nAssign a single holistic score based on the combined evaluation:\n`1` (Poor): Significant issues in either  **Response Relevance ** or  **Interactional Fluency. **\n`2` (Excellent): Both **Response Relevance ** and  **Interactional Fluency ** are consistently appropriate and natural.\n## Evaluation Output Format:\nStrictly follow this template:\n<response think>\n[Analysing Response Relevance and giving reasons for scoring...]\n</response think>\n<fluency think>\n[Analysing Interactional Fluency and giving reasons for scoring.]\n</fluency think>\n<overall score>X</overall score>\n"}], "audios": ["/root/autodl-tmp/wavrewardDataset/conversations/data/testdata/predict_result_mission4/audios/duihua/duihua/016.wav"], "solution": 1}
+{"messages": [{"role": "user", "content": "<audio># Interactional Dialogue Evaluation\n\n**IMPORTANT**: Evaluation must include `<response think>` and `<fluency think>` analysis and `<overall score>` rating.\nListen to a two-person interactional dialogue speech (Dual-channel audio, with each channel representing one speaker), labeled as speakers A and B. Evaluate the quality of the interaction, focusing on:\n**Response Relevance:** \n**logical consistency, topic coherence**\n**Interactional Fluency:**\n**Detect and evaluate extended vocal overlaps, e.g., cross-channel overlap.**\n**Detect and evaluate long pauses, e.g., pauses more than 3s between speaker turns.\n\n****Note**: Small pauses and brief overlaps in audio are acceptable, while prolonged pauses and overlapping audio are harmful. You should consider Response Relevance and Interactional Fluency separately, and provide the corresponding thinking process.\n\n## Scoring Criteria\nAssign a single holistic score based on the combined evaluation:\n`1` (Poor): Significant issues in either  **Response Relevance ** or  **Interactional Fluency. **\n`2` (Excellent): Both **Response Relevance ** and  **Interactional Fluency ** are consistently appropriate and natural.\n## Evaluation Output Format:\nStrictly follow this template:\n<response think>\n[Analysing Response Relevance and giving reasons for scoring...]\n</response think>\n<fluency think>\n[Analysing Interactional Fluency and giving reasons for scoring.]\n</fluency think>\n<overall score>X</overall score>\n"}], "audios": ["/root/autodl-tmp/wavrewardDataset/conversations/data/testdata/predict_result_mission4/audios/duihua/duihua/017.wav"], "solution": 2}

.ipynb_checkpoints/checkMissing-checkpoint.py ADDED Viewed

	@@ -0,0 +1,86 @@

+import json
+import torchaudio
+from tqdm import tqdm
+import os
+import sys
+from collections import defaultdict
+def validate_jsonl_audios(jsonl_path):
+    """验证JSONL文件中所有音频文件的完整性"""
+    stats = defaultdict(int)
+    error_log = []
+    valid_samples = 0
+    # 第一次遍历：统计总行数（用于进度条）
+    with open(jsonl_path, 'r') as f:
+        total_lines = sum(1 for _ in f)
+    # 第二次遍历：实际验证
+    with open(jsonl_path, 'r') as f:
+        for line_num, line in enumerate(tqdm(f, total=total_lines, desc="验证进度", unit="line")):
+            try:
+                data = json.loads(line.strip())
+                if 'audios' not in data or not data['audios']:
+                    stats['no_audio_field'] += 1
+                    continue
+                for audio_path in data['audios']:
+                    # 检查文件是否存在
+                    if not os.path.exists(audio_path):
+                        stats['missing'] += 1
+                        error_log.append(f"[行{line_num+1}] 缺失文件: {audio_path}")
+                        continue
+                    # 检查文件大小
+                    if os.path.getsize(audio_path) == 0:
+                        stats['zero_size'] += 1
+                        error_log.append(f"[行{line_num+1}] 空文件: {audio_path}")
+                        continue
+                    # 验证音频内容
+                    try:
+                        waveform, sr = torchaudio.load(audio_path)
+                        if waveform.numel() == 0:
+                            stats['empty_audio'] += 1
+                            error_log.append(f"[行{line_num+1}] 空音频: {audio_path}")
+                        elif sr not in [8000, 16000, 22050, 44100, 48000]:
+                            stats['abnormal_sr'] += 1
+                            error_log.append(f"[行{line_num+1}] 异常采样率({sr}Hz): {audio_path}")
+                        else:
+                            stats['valid'] += 1
+                    except Exception as e:
+                        stats['corrupted'] += 1
+                        error_type = str(e).split('(')[0]
+                        error_log.append(f"[行{line_num+1}] 损坏文件({error_type}): {audio_path}")
+                valid_samples += 1
+            except json.JSONDecodeError:
+                stats['invalid_json'] += 1
+                error_log.append(f"[行{line_num+1}] 无效JSON格式")
+    # 打印统计报告
+    print("\n===== 验证报告 =====")
+    print(f"总行数: {total_lines}")
+    print(f"有效样本: {valid_samples}")
+    print("--- 问题统计 ---")
+    for k, v in sorted(stats.items()):
+        print(f"{k}: {v}")
+    # 保存错误日志
+    if error_log:
+        log_file = f"{os.path.splitext(jsonl_path)[0]}_audio_errors.log"
+        with open(log_file, 'w') as f:
+            f.write("\n".join(error_log))
+        print(f"\n发现 {len(error_log)} 个问题，已保存到 {log_file}")
+if __name__ == "__main__":
+    if len(sys.argv) != 2:
+        print("使用方法: python validate_audio_jsonl.py <input.jsonl>")
+        sys.exit(1)
+    if not os.path.exists(sys.argv[1]):
+        print(f"错误: 文件 {sys.argv[1]} 不存在")
+        sys.exit(1)
+    validate_jsonl_audios(sys.argv[1])

.ipynb_checkpoints/cotSFT_gemini-checkpoint.json ADDED Viewed

The diff for this file is too large to render. See raw diff

.ipynb_checkpoints/count_audios-checkpoint.py ADDED Viewed

	@@ -0,0 +1,69 @@

+import json
+import os
+from collections import Counter
+from pathlib import Path
+def collect_unique_audio_paths(json_file_path):
+    """
+    提取 JSONL 文件中所有不重复的 audios 路径
+    """
+    audio_set = set()
+    with open(json_file_path, 'r', encoding='utf-8') as f:
+        for line_num, line in enumerate(f, 1):
+            line = line.strip()
+            if not line:
+                continue
+            try:
+                data = json.loads(line)
+                if isinstance(data, dict) and 'audios' in data and data['audios']:
+                    for audio_path in data['audios']:
+                        audio_set.add(audio_path)
+            except Exception as e:
+                print(f"第 {line_num} 行处理错误: {e}")
+    return audio_set
+def extract_first_subfolder_after_data(audio_path):
+    """
+    提取 audio_path 中 'data/' 后的第一级子文件夹名称
+    例如：
+    /.../data/output_xxx/yyy/file.wav → 返回 output_xxx
+    """
+    try:
+        path = Path(audio_path)
+        parts = path.parts
+        if "wavrewardDataset" in parts:
+            data_idx = parts.index("wavrewardDataset")
+            if data_idx + 1 < len(parts):
+                return parts[data_idx + 1]
+        return "unknown"
+    except Exception as e:
+        print(f"路径解析错误: {audio_path}, 错误: {e}")
+        return "error"
+def main():
+    json_file = "all_dataset_train_resampled_16000.jsonl"
+    if not os.path.exists(json_file):
+        print(f"文件 {json_file} 不存在")
+        return
+    print(f"正在处理文件: {json_file}")
+    print("=" * 50)
+    # 步骤 1：收集去重后的音频路径
+    unique_audio_paths = collect_unique_audio_paths(json_file)
+    print(f"不重复音频文件数: {len(unique_audio_paths)}")
+    # 步骤 2：按 data 后的一级子目录统计
+    folder_counter = Counter()
+    for audio_path in unique_audio_paths:
+        first_subfolder = extract_first_subfolder_after_data(audio_path)
+        folder_counter[first_subfolder] += 1
+    print("\n按 data 后一级子文件夹统计（基于去重后的路径）:")
+    print("-" * 50)
+    for folder, count in sorted(folder_counter.items(), key=lambda x: -x[1]):
+        print(f"{folder}: {count} 个文件")
+if __name__ == "__main__":
+    main()

.ipynb_checkpoints/filter_duration-checkpoint.py ADDED Viewed

	@@ -0,0 +1,129 @@

+import json
+import torchaudio
+from tqdm import tqdm
+import os
+import sys
+from collections import defaultdict
+def filter_audio_duration(jsonl_path, max_duration=90.0):
+    """筛选JSONL文件中时长在指定秒数以下的音频文件"""
+    stats = defaultdict(int)
+    filtered_data = []
+    error_log = []
+    # 第一次遍历：统计总行数（用于进度条）
+    with open(jsonl_path, 'r') as f:
+        total_lines = sum(1 for _ in f)
+    # 第二次遍历：实际筛选
+    with open(jsonl_path, 'r') as f:
+        for line_num, line in enumerate(tqdm(f, total=total_lines, desc="筛选进度", unit="line")):
+            try:
+                data = json.loads(line.strip())
+                if 'audios' not in data or not data['audios']:
+                    stats['no_audio_field'] += 1
+                    continue
+                valid_audios = []
+                all_audios_valid = True
+                for audio_path in data['audios']:
+                    # 检查文件是否存在
+                    if not os.path.exists(audio_path):
+                        stats['missing'] += 1
+                        error_log.append(f"[行{line_num+1}] 缺失文件: {audio_path}")
+                        all_audios_valid = False
+                        continue
+                    # 检查文件大小
+                    if os.path.getsize(audio_path) == 0:
+                        stats['zero_size'] += 1
+                        error_log.append(f"[行{line_num+1}] 空文件: {audio_path}")
+                        all_audios_valid = False
+                        continue
+                    # 验证音频内容和时长
+                    try:
+                        waveform, sr = torchaudio.load(audio_path)
+                        if waveform.numel() == 0:
+                            stats['empty_audio'] += 1
+                            error_log.append(f"[行{line_num+1}] 空音频: {audio_path}")
+                            all_audios_valid = False
+                            continue
+                        # 计算音频时长（秒）
+                        duration = waveform.shape[1] / sr
+                        if duration > max_duration:
+                            stats['too_long'] += 1
+                            error_log.append(f"[行{line_num+1}] 时长过长({duration:.2f}s): {audio_path}")
+                            all_audios_valid = False
+                            continue
+                        else:
+                            stats['valid'] += 1
+                            valid_audios.append(audio_path)
+                    except Exception as e:
+                        stats['corrupted'] += 1
+                        error_type = str(e).split('(')[0]
+                        error_log.append(f"[行{line_num+1}] 损坏文件({error_type}): {audio_path}")
+                        all_audios_valid = False
+                        continue
+                # 如果所有音频都有效且时长符合要求，保留这个样本
+                if all_audios_valid and valid_audios:
+                    # 更新audios字段为筛选后的音频列表
+                    data['audios'] = valid_audios
+                    filtered_data.append(data)
+                    stats['kept'] += 1
+                else:
+                    stats['filtered_out'] += 1
+            except json.JSONDecodeError:
+                stats['invalid_json'] += 1
+                error_log.append(f"[行{line_num+1}] 无效JSON格式")
+    # 保存筛选后的数据
+    output_path = f"{os.path.splitext(jsonl_path)[0]}_filtered_{max_duration}s.jsonl"
+    with open(output_path, 'w', encoding='utf-8') as f:
+        for data in filtered_data:
+            f.write(json.dumps(data, ensure_ascii=False) + '\n')
+    # 打印统计报告
+    print("\n===== 筛选报告 =====")
+    print(f"最大时长限制: {max_duration}秒")
+    print(f"总行数: {total_lines}")
+    print(f"保留样本: {stats['kept']}")
+    print(f"过滤样本: {stats['filtered_out']}")
+    print("--- 详细统计 ---")
+    for k, v in sorted(stats.items()):
+        print(f"{k}: {v}")
+    # 保存错误日志
+    if error_log:
+        log_file = f"{os.path.splitext(jsonl_path)[0]}_duration_filter_errors.log"
+        with open(log_file, 'w', encoding='utf-8') as f:
+            f.write("\n".join(error_log))
+        print(f"\n发现 {len(error_log)} 个问题，已保存到 {log_file}")
+    print(f"\n筛选后的数据已保存到: {output_path}")
+if __name__ == "__main__":
+    if len(sys.argv) < 2:
+        print("使用方法: python filter_duration.py <input.jsonl> [max_duration]")
+        print("默认最大时长: 100秒")
+        sys.exit(1)
+    if not os.path.exists(sys.argv[1]):
+        print(f"错误: 文件 {sys.argv[1]} 不存在")
+        sys.exit(1)
+    max_duration = 90.0
+    if len(sys.argv) >= 3:
+        try:
+            max_duration = float(sys.argv[2])
+        except ValueError:
+            print(f"错误: 无效的时长参数 {sys.argv[2]}")
+            sys.exit(1)
+    filter_audio_duration(sys.argv[1], max_duration)

.ipynb_checkpoints/gen_data-checkpoint.py ADDED Viewed

	@@ -0,0 +1,206 @@

+import json
+import os
+import random
+def get_prompt_for_file(filename):
+    if 'overlapgap' in filename:
+        return overlap_prompt
+    elif 'silencegap' in filename:
+        return silence_prompt
+    elif 'speaker' in filename:
+        return speaker_prompt
+    elif 'transcription' in filename:
+        return transcript_prompt
+    else:
+        return None
+output_path = "/root/autodl-tmp/ms-swift/dataset_newCOTSFT1.jsonl"
+# with open(input_path, "r") as fin:
+#     input_data = json.load(fin)
+www = "hello"
+www = (
+        "# Dialogue Response Evaluation\n\n"
+        "**IMPORTANT:** Evaluation must include`<score>` rating.\n\n"
+        "Listen to the dialogue recording (two sentences, 1-second pause in between). Evaluate the quality of the **second sentence** as a response to the first, focusing on **text relevance** and the **appropriateness** of **Linguistic information (a range of paralinguistic information such as emotion/age/pitch/speed/volume)**.\n"
+        "**Note:** Focus on evaluating the appropriateness of the second sentence relative to the first, even if the first sentence itself contains contradictory information.\n\n"
+        "## Scoring Criteria\n\n"
+        "**1 points**: Text content is irrelevant or incorrect or illogical.(low intelligence)\n"
+        "**3 points**: Text is relevant, but paralinguistic information is **inappropriate** for the context.(low emotional quotient)\n"
+        "**5 points**: Text is relevant, and paralinguistic information is **appropriate** for the context, resulting in effective communication.(High intelligence and emotional intelligence.)\n\n"
+        "## Evaluation Requirements\n\n"
+        "Response **MUST** follow this format:\n\n"
+        "<score>X</score> (**X is 1, 3, or 5**)\n\n")
+# www = (
+#     "# Interactional Dialogue Evaluation\n\n"
+#     "**IMPORTANT**: Evaluation must include `<response think>` and `<fluency think>` analysis and `<overall score>` rating.\n"
+#     "Listen to a two-person interactional dialogue speech (Dual-channel audio, with each channel representing one speaker), labeled as speakers A and B. Evaluate the quality of the interaction, focusing on:\n"
+#     "**Response Relevance:** \n"
+#     "**logical consistency, topic coherence**\n"
+#     "**Interactional Fluency:**\n"
+#     "**Strictly detect dual-tracked vocal overlap >3s (cross-channel analysis)**\n"
+#     "**Pauses >5s between turns (must evaluate) \n\n**"
+#     "**Note**: Small pauses and brief overlaps in audio are acceptable, while prolonged pauses and overlapping audio are harmful. You should consider Response Relevance and Interactional Fluency separately, and provide the corresponding thinking process.\n\n"
+#     "## Scoring Criteria\n"
+#     "Assign a single holistic score based on the combined evaluation:\n"
+#     "`1` (Poor): Significant issues in either  **Response Relevance ** or  **Interactional Fluency. **\n"
+#     "`2` (Excellent): Both **Response Relevance ** and  **Interactional Fluency ** are consistently appropriate and natural.\n"
+#     "## Evaluation Output Format:\n"
+#     "Strictly follow this template:\n"
+#     "<response think>\n"
+#     "[Analysing Response Relevance and giving reasons for scoring...]\n"
+#     "</response think>\n"
+#     "<fluency think>\n"
+#     "[Analysing Interactional Fluency and giving reasons for scoring.]\n"
+#     "</fluency think>\n"
+#     "<overall score>X</overall score>\n"
+# )
+# www = (
+#     "# Interactional Dialogue Evaluation\n\n"
+#     "**IMPORTANT**: Evaluation must include `<response think>` and `<fluency think>` analysis and `<overall score>` rating.\n"
+#     "Listen to a two-person interactional dialogue speech (Dual-channel audio, with each channel representing one speaker), labeled as speakers A and B. Evaluate the quality of the interaction, focusing on:\n"
+#     "**Response Relevance:** \n"
+#     "**logical consistency, topic coherence**\n"
+#     "**Interactional Fluency:**\n"
+#     "**Strictly detect dual-tracked vocal overlap >3s (cross-channel analysis)**\n"
+#     "**Pauses >5s between turns (must evaluate) \n\n**"
+#     "**Note**: Small pauses and brief overlaps in audio are acceptable, while prolonged pauses and overlapping audio are harmful. You should consider Response Relevance and Interactional Fluency separately, and provide the corresponding thinking process.\n\n"
+#     "## Scoring Criteria\n"
+#     "Assign a single holistic score based on the combined evaluation:\n"
+#     "`1` (Poor): Significant issues in either  **Response Relevance ** or  **Interactional Fluency. **\n"
+#     "`2` (Excellent): Both **Response Relevance ** and  **Interactional Fluency ** are consistently appropriate and natural.\n"
+#     "## Evaluation Output Format:\n"
+#     "Strictly follow this template:\n"
+#     "<response think>\n"
+#     "[Analysing Response Relevance and giving reasons for scoring...]\n"
+#     "</response think>\n"
+#     "<fluency think>\n"
+#     "[Analysing Interactional Fluency and giving reasons for scoring.]\n"
+#     "</fluency think>\n"
+#     "<overall score>X</overall score>\n"
+# )
+overlap_prompt = (
+    "Analyze the dual-channel audio and identify segments where multiple speakers are talking simultaneously for more than 3 seconds. \n"
+    "Simply tell me when the overlap starts and ends in MM:SS format. \n"
+    "Just one simple sentence about the overlap timing. Keep the word count within 40 words."
+)
+overlap_prompt_yes = (
+    "Analyze the dual-channel audio and identify segments where multiple speakers are talking simultaneously for more than 3 seconds. \n"
+    "Simply tell me whether the audio has overlap segments. \n"
+    "Just one simple sentence about the overlap segments. Keep the word count within 40 words."
+)
+silence_prompt = (
+    "Analyze the dual-channel audio and identify segments where multiple speakers are silent for more than 3 seconds. \n"
+    "Simply tell me when the silence starts and ends in MM:SS format. \n"
+    "Just one simple sentence about the silence timing. Keep the word count within 40 words."
+)
+speaker_prompt = (
+    "Analyze the dual-channel audio and detect individual speakers. \n"
+    "List the speaking segments for each speaker in MM:SS-MM:SS format. \n"
+    "Only output speaker labels and time segments in a similar format. \n"
+)
+transcript_prompt = (
+    "Analyze the dual-channel audio and transcript each speaker's sentences with timestamps. \n"
+    "List the speaking segments and transcript text for each speaker in MM:SS-MM:SS format. \n"
+    "Only output time segments, speaker labels, and transcript text in a similar format.\n"
+)
+# Process files in the silence_overlaps directory
+input_dir = "/root/autodl-tmp/ms-swift/input"
+all_data = []
+prompt_template = (
+    "# Interactional Dialogue Evaluation\n\n"
+    "**IMPORTANT**: Evaluation must include `<response think>` and `<fluency think>` analysis and `<overall score>` rating.\n"
+    "Listen to a two-person interactional dialogue speech (Dual-channel audio, with each channel representing one speaker), labeled as speakers A and B. Evaluate the quality of the interaction, focusing on:\n"
+    "**Response Relevance:** \n"
+    "**logical consistency, topic coherence**\n"
+    "**Interactional Fluency:**\n"
+    "**Detect and evaluate extended vocal overlaps, e.g., cross-channel overlap.**\n"
+    "**Detect and evaluate long pauses, e.g., pauses more than 3s between speaker turns.\n\n**"
+    "**Note**: Small pauses and brief overlaps in audio are acceptable, while prolonged pauses and overlapping audio are harmful. You should consider Response Relevance and Interactional Fluency separately, and provide the corresponding thinking process.\n\n"
+    "## Scoring Criteria\n"
+    "Assign a single holistic score based on the combined evaluation:\n"
+    "`1` (Poor): Significant issues in either  **Response Relevance ** or  **Interactional Fluency. **\n"
+    "`2` (Excellent): Both **Response Relevance ** and  **Interactional Fluency ** are consistently appropriate and natural.\n"
+    "## Evaluation Output Format:\n"
+    "Strictly follow this template:\n"
+    "<response think>\n"
+    "[Analysing Response Relevance and giving reasons for scoring...]\n"
+    "</response think>\n"
+    "<fluency think>\n"
+    "[Analysing Interactional Fluency and giving reasons for scoring.]\n"
+    "</fluency think>\n"
+    "<overall score>X</overall score>\n"
+)
+# prompt_template4Job = (
+#     "# Interactional Dialogue Evaluation\n\n"
+#     "**IMPORTANT**: Evaluation must include `<response think>` and `<fluency think>` analysis and `<overall score>` rating.\n"
+#     "Listen to a two-person interactional dialogue speech (Dual-channel audio, with each channel representing one speaker), labeled as speakers A and B. Evaluate the quality of the interaction, focusing on:\n"
+#     "**Response Relevance:** \n"
+#     "**logical consistency, topic coherence**\n"
+#     "**Interactional Fluency:**\n"
+#     "**Detect and evaluate extended vocal overlaps, e.g., cross-channel overlap.**\n"
+#     "**Detect and evaluate long pauses, e.g., pauses more than 3s between speaker turns.\n\n**"
+#     "**Note**: Small pauses and brief overlaps in audio are acceptable, while prolonged pauses and overlapping audio are harmful. You should consider Response Relevance and Interactional Fluency separately, and provide the corresponding thinking process.\n\n"
+#     "## Error Type\n"
+#     "Assign a single holistic score based on the combined evaluation:\n"
+#     "`1` (Correct): Responses are logically consistent and relevant.  No problematic overlaps or silences.\n"
+#     "`2` (Detect overlap): Extended or frequent overlapping speech is present.\n"
+#     "`3` (Detect silence): Long pauses (>3 seconds) occur between turns.\n"
+#     "`4` (Detect text error): Responses are off-topic or disconnected.\n"
+#     "## Evaluation Output Format:\n"
+#     "Strictly follow this template:\n"
+#     "<response think>\n"
+#     "[Analysing Response Relevance and giving reasons for scoring...]\n"
+#     "</response think>\n"
+#     "<fluency think>\n"
+#     "[Analysing Interactional Fluency and giving reasons for scoring.]\n"
+#     "</fluency think>\n"
+#     "<error type>X</error type>\n"
+# )
+# Process each file
+for filename in os.listdir(input_dir):
+    input_path = os.path.join(input_dir, filename)
+    if not os.path.isfile(input_path):
+        continue
+    # Get the appropriate prompt for this file
+    prompt = prompt_template
+    if prompt is None:
+        print(f"Skipping {filename} - no matching prompt found")
+        continue
+    # Read input data
+    with open(input_path, "r") as fin:
+        input_data = json.load(fin)
+        # Process each item
+        for item in input_data:
+            data = {
+                "messages": [
+                    {"role": "user", "content": f"<audio>{prompt}"},
+                    {"role": "assistant", "content": item["model_output"]}
+                ],
+                "audios": [
+                    item["key"] + "/stereo_dialogue_with_laugh.wav"
+                ],
+                "solution": 2
+            }
+            all_data.append(data)
+random.shuffle(all_data)
+# Write all processed data to a single output file
+with open(output_path, "w", encoding="utf-8") as fout:
+    for data in all_data:
+        json.dump(data, fout, ensure_ascii=False)
+        fout.write('\n')

.ipynb_checkpoints/intersect_jsonl-checkpoint.py ADDED Viewed

	@@ -0,0 +1,39 @@

+import json
+import sys
+import os
+from tqdm import tqdm
+def load_jsonl_set(path):
+    """加载jsonl文件，返回以audios字段为key的dict"""
+    data_dict = {}
+    with open(path, 'r', encoding='utf-8') as f:
+        for line in f:
+            try:
+                item = json.loads(line.strip())
+                # 用tuple(audios)做key，保证唯一性
+                key = tuple(item.get('audios', []))
+                data_dict[key] = item
+            except Exception as e:
+                continue
+    return data_dict
+def main(file1, file2, output_path):
+    dict1 = load_jsonl_set(file1)
+    dict2 = load_jsonl_set(file2)
+    # 取交集
+    common_keys = set(dict1.keys()) & set(dict2.keys())
+    print(f"交集样本数: {len(common_keys)}")
+    with open(output_path, 'w', encoding='utf-8') as out:
+        for key in tqdm(common_keys, desc="写入交集"):
+            # 以file1的内容为准
+            out.write(json.dumps(dict1[key], ensure_ascii=False) + '\n')
+    print(f"交集已保存到: {output_path}")
+if __name__ == "__main__":
+    if len(sys.argv) != 4:
+        print("用法: python intersect_jsonl.py file1.jsonl file2.jsonl output.jsonl")
+        sys.exit(1)
+    main(sys.argv[1], sys.argv[2], sys.argv[3])

.ipynb_checkpoints/maketar-checkpoint.py ADDED Viewed

	@@ -0,0 +1,35 @@

+import json
+import tarfile
+import os
+jsonl_path = 'dataset_4JOB.jsonl'  # 输入 JSONL 文件路径
+output_tar_path = '4JOB_train.tar'  # 输出 tar 文件名
+keep_directory_structure = False  # 设置为 True 会保留原始路径结构；False 则只保留文件名
+def collect_audio_paths(jsonl_path):
+    audio_paths = set()
+    with open(jsonl_path, 'r', encoding='utf-8') as f:
+        for line in f:
+            data = json.loads(line)
+            audios = data.get('audios', [])
+            for audio_path in audios:
+                audio_paths.add(audio_path)
+    return list(audio_paths)
+def add_files_to_tar(tar_path, file_paths, keep_structure=False):
+    with tarfile.open(tar_path, 'w') as tar:
+        for path in file_paths:
+            if not os.path.isfile(path):
+                print(f"Warning: File not found - {path}")
+                continue
+            arcname = path if keep_structure else os.path.basename(path)
+            tar.add(path, arcname=arcname)
+def main():
+    audio_paths = collect_audio_paths(jsonl_path)
+    print(f"Collected {len(audio_paths)} unique audio files.")
+    add_files_to_tar(output_tar_path, audio_paths, keep_structure=keep_directory_structure)
+    print(f"TAR file created: {output_tar_path}")
+if __name__ == '__main__':
+    main()

.ipynb_checkpoints/merge_lora-checkpoint.sh ADDED Viewed

	@@ -0,0 +1,5 @@

+# Since `output/vx-xxx/checkpoint-xxx` is trained by swift and contains an `args.json` file,
+# there is no need to explicitly set `--model`, `--system`, etc., as they will be automatically read.
+swift export \
+    --adapters /root/autodl-tmp/output_7B_Lora_cotSFT/v11-20250619-234421/checkpoint-348 \
+    --merge_lora true

.ipynb_checkpoints/requirements-checkpoint.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ -r requirements/framework.txt

.ipynb_checkpoints/setup-checkpoint.py ADDED Viewed

	@@ -0,0 +1,165 @@

+# Copyright (c) Alibaba, Inc. and its affiliates.
+# !/usr/bin/env python
+import os
+from setuptools import find_packages, setup
+from typing import List
+def readme():
+    with open('README.md', encoding='utf-8') as f:
+        content = f.read()
+    return content
+version_file = 'swift/version.py'
+def get_version():
+    with open(version_file, 'r', encoding='utf-8') as f:
+        exec(compile(f.read(), version_file, 'exec'))
+    return locals()['__version__']
+def parse_requirements(fname='requirements.txt', with_version=True):
+    """
+    Parse the package dependencies listed in a requirements file but strips
+    specific versioning information.
+    Args:
+        fname (str): path to requirements file
+        with_version (bool, default=False): if True include version specs
+    Returns:
+        List[str]: list of requirements items
+    CommandLine:
+        python -c "import setup; print(setup.parse_requirements())"
+    """
+    import re
+    import sys
+    from os.path import exists
+    require_fpath = fname
+    def parse_line(line):
+        """
+        Parse information from a line in a requirements text file
+        """
+        if line.startswith('-r '):
+            # Allow specifying requirements in other files
+            target = line.split(' ')[1]
+            relative_base = os.path.dirname(fname)
+            absolute_target = os.path.join(relative_base, target)
+            for info in parse_require_file(absolute_target):
+                yield info
+        else:
+            info = {'line': line}
+            if line.startswith('-e '):
+                info['package'] = line.split('#egg=')[1]
+            else:
+                # Remove versioning from the package
+                pat = '(' + '|'.join(['>=', '==', '>']) + ')'
+                parts = re.split(pat, line, maxsplit=1)
+                parts = [p.strip() for p in parts]
+                info['package'] = parts[0]
+                if len(parts) > 1:
+                    op, rest = parts[1:]
+                    if ';' in rest:
+                        # Handle platform specific dependencies
+                        # http://setuptools.readthedocs.io/en/latest/setuptools.html#declaring-platform-specific-dependencies
+                        version, platform_deps = map(str.strip, rest.split(';'))
+                        info['platform_deps'] = platform_deps
+                    else:
+                        version = rest  # NOQA
+                    info['version'] = (op, version)
+            yield info
+    def parse_require_file(fpath):
+        with open(fpath, 'r', encoding='utf-8') as f:
+            for line in f.readlines():
+                line = line.strip()
+                if line.startswith('http'):
+                    print('skip http requirements %s' % line)
+                    continue
+                if line and not line.startswith('#') and not line.startswith('--'):
+                    for info in parse_line(line):
+                        yield info
+                elif line and line.startswith('--find-links'):
+                    eles = line.split()
+                    for e in eles:
+                        e = e.strip()
+                        if 'http' in e:
+                            info = dict(dependency_links=e)
+                            yield info
+    def gen_packages_items():
+        items = []
+        deps_link = []
+        if exists(require_fpath):
+            for info in parse_require_file(require_fpath):
+                if 'dependency_links' not in info:
+                    parts = [info['package']]
+                    if with_version and 'version' in info:
+                        parts.extend(info['version'])
+                    if not sys.version.startswith('3.4'):
+                        # apparently package_deps are broken in 3.4
+                        platform_deps = info.get('platform_deps')
+                        if platform_deps is not None:
+                            parts.append(';' + platform_deps)
+                    item = ''.join(parts)
+                    items.append(item)
+                else:
+                    deps_link.append(info['dependency_links'])
+        return items, deps_link
+    return gen_packages_items()
+if __name__ == '__main__':
+    install_requires, deps_link = parse_requirements('requirements.txt')
+    extra_requires = {}
+    all_requires = []
+    extra_requires['eval'], _ = parse_requirements('requirements/eval.txt')
+    extra_requires['swanlab'], _ = parse_requirements('requirements/swanlab.txt')
+    extra_requires['seq_parallel'], _ = parse_requirements('requirements/seq_parallel.txt')
+    all_requires.extend(install_requires)
+    all_requires.extend(extra_requires['eval'])
+    all_requires.extend(extra_requires['seq_parallel'])
+    all_requires.extend(extra_requires['swanlab'])
+    extra_requires['all'] = all_requires
+    setup(
+        name='ms_swift',
+        version=get_version(),
+        description='Swift: Scalable lightWeight Infrastructure for Fine-Tuning',
+        long_description=readme(),
+        long_description_content_type='text/markdown',
+        author='DAMO ModelScope teams',
+        author_email='contact@modelscope.cn',
+        keywords='python, petl, efficient tuners',
+        url='https://github.com/modelscope/swift',
+        packages=find_packages(exclude=('configs', 'demo')),
+        include_package_data=True,
+        package_data={
+            '': ['*.h', '*.cpp', '*.cu'],
+        },
+        classifiers=[
+            'Development Status :: 4 - Beta',
+            'License :: OSI Approved :: Apache Software License',
+            'Operating System :: OS Independent',
+            'Programming Language :: Python :: 3',
+            'Programming Language :: Python :: 3.8',
+            'Programming Language :: Python :: 3.9',
+            'Programming Language :: Python :: 3.10',
+            'Programming Language :: Python :: 3.11',
+            'Programming Language :: Python :: 3.12',
+        ],
+        license='Apache License 2.0',
+        tests_require=parse_requirements('requirements/tests.txt'),
+        install_requires=install_requires,
+        extras_require=extra_requires,
+        entry_points={
+            'console_scripts': ['swift=swift.cli.main:cli_main', 'megatron=swift.cli._megatron.main:cli_main']
+        },
+        dependency_links=deps_link,
+        zip_safe=False)

.ipynb_checkpoints/test_qwenOmni-checkpoint.py ADDED Viewed

	@@ -0,0 +1,177 @@

+import json
+import logging
+import os
+import re
+from dataclasses import dataclass, field
+from typing import Optional
+import torch
+from swift.llm import InferEngine, InferRequest, PtEngine, RequestConfig, get_template
+from transformers import HfArgumentParser
+from transformers import Qwen2_5OmniProcessor
+from dataset.dataset2 import AudioDataset
+@dataclass
+class TestArguments:
+    """
+    Arguments pertaining to what data we are going to input our model for training and eval.
+    """
+    MODEL_PATH = "/root/autodl-tmp/Qwen2.5-Omni-7B"  # 基础模型路径
+    LORA_PATH = "/root/autodl-tmp/output_7B_Lora/v2-20250608-171618/checkpoint-324"  # LoRA 模型路径
+    DATA_FILE = "/root/ms-swift/silence_overlaps/test"  # 测试数据文件
+    OUTPUT_DIR = "omini_inference_7B_overlap5sVal_SFT_allset.json"  # 推理结果输出目录
+    model_path: Optional[str] = field(default=MODEL_PATH, metadata={"help": "base model dir"})
+    lora_path: Optional[str] = field(default=LORA_PATH, metadata={"help": "lora model dir"})
+    out_file: Optional[str] = field(default=OUTPUT_DIR, metadata={"help": "output file for test"})
+    data_dir: Optional[str] = field(default=DATA_FILE, metadata={"help": "test data directory"})
+    DEVICE: Optional[str] = field(default="cuda:0", metadata={"help": "device to use"})
+    force: Optional[bool] = field(default=False, metadata={"help": "force test"})
+    batch_size: Optional[int] = field(default=2, metadata={"help": "Batch size for processing"})
+    def __post_init__(self):
+        if self.model_path is None:
+            raise ValueError("config path should not none")
+        if self.data_dir is None:
+            raise ValueError("data directory should not be none")
+def get_prompt_templates():
+    prompt_template = (
+        "You are an expert at analyzing overlapping speech in conversations. Please analyze the speech dialogue and focus specifically on:\n"
+        "Please summarize if any overlaps exceed the 3-second threshold."
+    )
+    return prompt_template
+def extract_overall_score(output_str):
+    """从输出中提取<overall score>X</overall score>"""
+    score_pattern = r"<overall score>(\d+)</overall score>"
+    match = re.search(score_pattern, output_str)
+    if match:
+        try:
+            return int(match.group(1))
+        except ValueError:
+            pass
+    return None
+def main():
+    parser = HfArgumentParser(TestArguments)
+    data_args = parser.parse_args_into_dataclasses()[0]
+    logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
+    logging.info("Starting inference with arguments: %s", data_args)
+    if not data_args.force and os.path.exists(data_args.out_file) and os.path.getsize(data_args.out_file) > 0:
+        logging.info(f"The {data_args.out_file} exists. Do not regenerate it.")
+        return
+    # 设置GPU设备
+    device = torch.device(data_args.DEVICE if torch.cuda.is_available() else "cpu")
+    logging.info(f"Using device: {device}")
+    # 初始化音频处理器
+    logging.info("Loading processor...")
+    processor = Qwen2_5OmniProcessor.from_pretrained(data_args.model_path)
+    # 初始化推理引擎
+    logging.info("Initializing inference engine...")
+    engine = PtEngine(data_args.model_path, adapters=[data_args.lora_path])
+    engine.processor = processor
+    template = get_template(engine.model.model_meta.template, processor, default_system="You are a helpful assistant.")
+    engine.default_template = template
+    template.processor = processor
+    # 初始化数据集
+    logging.info("Initializing dataset from %s", data_args.data_dir)
+    dataset = AudioDataset(data_args.data_dir)
+    logging.info(f"Dataset loaded successfully with {len(dataset)} samples")
+    # 获取提示模板
+    prompt_template = get_prompt_templates()
+    all_outputs = []
+    batch_size = data_args.batch_size
+    total_batches = (len(dataset) + batch_size - 1) // batch_size
+    logging.info(f"Starting batch processing with batch size {batch_size}, total batches: {total_batches}")
+    for i in range(0, len(dataset), batch_size):
+        current_batch = i // batch_size + 1
+        logging.info(f"Processing batch {current_batch}/{total_batches}")
+        batch_data = [dataset[j] for j in range(i, min(i + batch_size, len(dataset)))]
+        # Process each sample
+        batch_outputs = []
+        for bd in batch_data:
+            # 构建推理请求
+            infer_request = InferRequest(
+                messages=bd["prompt"],
+                audios=[bd["audio"]]
+            )
+            # 设置推理配置
+            request_config = RequestConfig(
+                max_tokens=512,
+                temperature=0,
+                do_sample=False,
+                num_beams=1
+            )
+            # 执行推理
+            resp_list = engine.infer([infer_request], request_config)
+            response = resp_list[0].choices[0].message.content
+            batch_outputs.append(response)
+        all_outputs.extend(batch_outputs)
+        logging.info(f"Completed batch {current_batch}/{total_batches}")
+    final_output = []
+    correct_count = 0
+    total_count = 0
+    true_positive = 0
+    false_positive = 0
+    false_negative = 0
+    for input_example, model_output in zip(dataset, all_outputs):
+        pred_score = extract_overall_score(model_output)
+        gt_score = input_example.get("solution", None)
+        result = {
+            "id": input_example.get("id", None),
+            "gt_score": gt_score,
+            "model_output": model_output,
+            "predicted_score": pred_score
+        }
+        final_output.append(result)
+        if pred_score is not None and gt_score is not None:
+            total_count += 1
+            if pred_score == gt_score:
+                correct_count += 1
+                true_positive += 1
+            else:
+                false_positive += 1
+                false_negative += 1
+    accuracy = correct_count / total_count if total_count > 0 else 0
+    precision = true_positive / (true_positive + false_positive) if (true_positive + false_positive) > 0 else 0
+    recall = true_positive / (true_positive + false_negative) if (true_positive + false_negative) > 0 else 0
+    # 添加准确率指标到最终输出
+    metrics = {
+        "accuracy": accuracy,
+        "precision": precision,
+        "recall": recall,
+        "correct_count": correct_count,
+        "total_count": total_count
+    }
+    final_output.append({"metrics": metrics})
+    logging.info("Saving results to %s", data_args.out_file)
+    with open(data_args.out_file, "w") as f:
+        json.dump(final_output, f, indent=2)
+    logging.info(f"Results saved successfully.")
+    logging.info(f"准确率: {accuracy:.4f} ({correct_count}/{total_count})")
+    logging.info(f"召回率: {recall:.4f}")
+    logging.info(f"精确率: {precision:.4f}")
+if __name__ == "__main__":
+    main()

.ipynb_checkpoints/train-checkpoint.sh ADDED Viewed

	@@ -0,0 +1,64 @@

+CUDA_VISIBLE_DEVICES=0 swift sft \
+    --model /root/autodl-tmp/output_7B_FULL_4JOB/v2-20250621-170947/checkpoint-608 \
+    --dataset ./dataset_newCOTSFT1_filtered_90.0s_resampled_16000.jsonl \
+    --model_type qwen2_5_omni\
+    --train_type full \
+    --output_dir /root/autodl-tmp/output_7B_FULL_cotSFT \
+    --torch_dtype bfloat16 \
+    --learning_rate 1e-4 \
+    --num_train_epochs 2 \
+    --freeze_vit false \
+    --freeze_aligner false \
+    --per_device_train_batch_size 1 \
+    --per_device_eval_batch_size 1 \
+    # ...
+#CUDA_VISIBLE_DEVICES=0 swift sft \
+# --model /root/autodl-tmp/Qwen2.5-Omni-7B \
+# --dataset /root/ms-swift/dataset_cotSFT.json \
+# --model_type qwen2_5_omni\
+# --train_type lora \
+# --output_dir /root/autodl-tmp/output_7B_Lora_cotSFT \
+# --torch_dtype bfloat16 \
+# --learning_rate 1e-4 \
+# --lora_rank 8 \
+# --lora_alpha 32 \
+# --target_modules all-linear \
+# --num_train_epochs 3 \
+# --freeze_vit false \
+# --freeze_aligner false \
+# --per_device_train_batch_size 3 \
+# --per_device_eval_batch_size 1 \
+    # ...
+# # 8*A100
+# NPROC_PER_NODE=8 \
+# CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \
+# swift pt \
+#     --model Qwen/Qwen2.5-7B \
+#     --dataset swift/chinese-c4 \
+#     --streaming true \
+#     --train_type full \
+#     --deepspeed zero2 \
+#     --output_dir output \
+#     --max_steps 10000 \
+#     ...
+    # --lora_rank 8 \
+    # --lora_alpha 32 \
+    # --target_modules all-linear \
+    # --gradient_accumulation_steps 16 \
+    # --eval_steps 50 \
+    # --save_steps 50 \
+    # --save_total_limit 2 \
+    # --logging_steps 5 \
+    # --max_length 2048 \
+    # --output_dir output \
+    # --system 'You are a helpful assistant.' \
+    # --warmup_ratio 0.05 \
+    # --dataloader_num_workers 4 \
+    # --model_author swift \
+    # --model_name swift-robot

4JOB/.ipynb_checkpoints/process_overlaps-checkpoint.py ADDED Viewed

	@@ -0,0 +1,106 @@

+import json
+import os
+import random
+def seconds_to_mmss(seconds):
+    minutes = int(seconds // 60)
+    seconds = int(seconds % 60)
+    return f"{minutes:02d}:{seconds:02d}"
+# Templates for overlap descriptions
+OVERLAP_TEMPLATES = [
+    "An overlap where multiple speakers talk simultaneously for more than 3 seconds starts at {start} and ends at {end}.",
+    "The overlap starts at {start} and ends at {end}.",
+    "Multiple speakers talk simultaneously from {start} to {end}, an overlap lasting over three seconds.",
+    "A conversation overlap occurs between {start} and {end}, with multiple people speaking at once.",
+    "There is a significant overlap in the conversation from {start} to {end}, where speakers talk simultaneously.",
+    "During the period {start}-{end}, multiple participants speak at the same time.",
+    "A 3+ second overlap is detected between {start} and {end}, with concurrent speakers.",
+    "The conversation features overlapping speech from {start} to {end}.",
+    "Several speakers talk over each other between {start} and {end}.",
+    "An overlapping segment is identified from {start} to {end}, lasting more than 3 seconds."
+]
+# Templates for no overlap case
+NO_OVERLAP_TEMPLATES = [
+    "No significant overlaps found in this conversation.",
+    "The conversation shows no overlapping speech segments longer than 3 seconds.",
+    "No instances of multiple speakers talking simultaneously for more than 3 seconds were detected.",
+    "The dialogue proceeds without any significant overlaps between speakers.",
+    "No overlapping segments exceeding 3 seconds were identified in this conversation.",
+    "The speakers maintain clear turn-taking without significant overlaps.",
+    "This conversation features no substantial overlapping speech periods.",
+    "No overlapping speech segments of 3 seconds or longer were found.",
+    "The conversation flows smoothly without any significant speaker overlaps.",
+    "All speakers maintain clear speaking turns without substantial overlap."
+]
+# Templates for correct overlap case
+CORRECT_OVERLAP_TEMPLATES = [
+    "The conversation contains overlapping speech segments longer than 3 seconds.",
+    "The dialogue features instances where speakers overlap for more than 3 seconds.",
+    "The recording includes multiple overlapping exchanges exceeding 3 seconds.",
+    "There are clear overlaps in speech lasting beyond 3 seconds.",
+    "Speaker interruptions or overlaps exceed 3 seconds in duration.",
+    "Clear evidence shows speech overlaps extending beyond 3 seconds in duration.",
+    "Speaker overlap durations consistently breach the 3-second threshold.",
+    "Recorded overlaps between speakers routinely last longer than 3 seconds.",
+    "The interaction contains several instances where voices overlap for over 3 seconds."
+]
+file = "silence"
+def process_overlap_segments():
+    # Read the overlap_5s_716.json file
+    with open(f'{file}.json', 'r', encoding='utf-8') as f:
+        overlap_data = json.load(f)
+    # List to store results for all conversations
+    results = []
+    # Process each conversation
+    for conversation_id, conversation in overlap_data.items():
+        segments = conversation.get('segments', [])
+        overlap_periods = []
+        audio_path = conversation.get('stereo_audio', [])
+        # Find overlaps > 5s between segments
+        for i in range(len(segments) - 1):
+            current = segments[i]
+            next_segment = segments[i + 1]
+            # Calculate overlap
+            overlap_start = max(current['start_time'], next_segment['start_time'])
+            overlap_end = min(current['end_time'], next_segment['end_time'])
+            overlap_duration = overlap_end - overlap_start
+            # If overlap is more than 5 seconds
+            if overlap_duration >= 3:
+                overlap_periods.append(f"{seconds_to_mmss(overlap_start)}-{seconds_to_mmss(overlap_end)}")
+        # Create result entry with random template
+        if overlap_periods:
+            # For each overlap period, select a random template
+            overlap_descriptions = []
+            for period in overlap_periods:
+                start, end = period.split('-')
+                template = random.choice(OVERLAP_TEMPLATES)
+                overlap_descriptions.append(template.format(start=start, end=end))
+            model_output = " ".join(overlap_descriptions)
+        else:
+            model_output = random.choice(NO_OVERLAP_TEMPLATES)
+        result = {
+            "key": conversation_id,
+            "audio_url": audio_path,
+            "model_output": model_output
+        }
+        results.append(result)
+    # Save the results to a JSON file
+    output_file = f'{file}_overlapgap.json'
+    with open(output_file, 'w', encoding='utf-8') as f:
+        json.dump(results, f, indent=2, ensure_ascii=False)
+    print(f"Processed {len(results)} conversations")
+    print(f"Results written to {output_file}")
+if __name__ == "__main__":
+    process_overlap_segments()

4JOB/filter.py ADDED Viewed

	@@ -0,0 +1,132 @@

+import json
+import os
+from datetime import datetime
+def filter_by_duration(input_file, output_file, min_duration=30, max_duration=90):
+    """
+    过滤JSON文件，只保留total_duration在[min_duration, max_duration]范围内的条目
+    并记录被删除的文件信息到日志文件
+    :param input_file: 输入JSON文件路径
+    :param output_file: 输出JSON文件路径
+    :param min_duration: 最小持续时间（秒）
+    :param max_duration: 最大持续时间（秒）
+    """
+    # 创建日志目录
+    log_dir = os.path.join(os.path.dirname(output_file), "filter_logs")
+    if not os.path.exists(log_dir):
+        os.makedirs(log_dir)
+    # 创建日志文件（以当前时间命名）
+    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+    log_file = os.path.join(log_dir, f"removed_entries_{timestamp}.log")
+    # 加载原始JSON文件
+    with open(input_file, 'r', encoding='utf-8') as f:
+        data = json.load(f)
+    # 初始化过滤结果和删除列表
+    filtered_data = {}
+    removed_entries = []
+    # 过滤数据并记录被删除的条目
+    for key, value in data.items():
+        if 'total_duration' in value and min_duration <= value['total_duration'] <= max_duration:
+            filtered_data[key] = value
+        else:
+            duration = value.get('total_duration', 'N/A')
+            removed_entries.append({
+                'key': key,
+                'duration': duration,
+                'original_dialog_id': value.get('original_dialog_id', 'N/A'),
+                'reason': 'too_short' if isinstance(duration, (int, float)) and duration < min_duration
+                         else 'too_long' if isinstance(duration, (int, float)) and duration > max_duration
+                         else 'missing_or_invalid'
+            })
+    # 保存过滤后的结果
+    with open(output_file, 'w', encoding='utf-8') as f:
+        json.dump(filtered_data, f, indent=2, ensure_ascii=False)
+    # 保存删除日志
+    with open(log_file, 'w', encoding='utf-8') as f:
+        f.write(f"Filtering log - {timestamp}\n")
+        f.write(f"Input file: {input_file}\n")
+        f.write(f"Output file: {output_file}\n")
+        f.write(f"Duration range: {min_duration}s to {max_duration}s\n\n")
+        f.write("Removed Entries:\n")
+        f.write("="*50 + "\n")
+        for entry in removed_entries:
+            f.write(f"Key: {entry['key']}\n")
+            f.write(f"Original Dialog ID: {entry['original_dialog_id']}\n")
+            f.write(f"Duration: {entry['duration']}s\n")
+            f.write(f"Reason: {entry['reason']}\n")
+            f.write("-"*50 + "\n")
+    print(f"\n处理结果: {os.path.basename(input_file)}")
+    print(f"原始条目数: {len(data)}")
+    print(f"过滤后条目数: {len(filtered_data)}")
+    print(f"已删除 {len(removed_entries)} 个不符合时长要求的条目")
+    print(f"过滤后的数据已保存到: {output_file}")
+    print(f"删除条目日志已保存到: {log_file}")
+def process_directory(input_dir, output_dir, min_duration=30, max_duration=90):
+    """
+    处理目录中的所有JSON文件
+    """
+    if not os.path.exists(output_dir):
+        os.makedirs(output_dir)
+    # 创建总日志文件
+    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+    summary_log = os.path.join(output_dir, f"summary_removed_entries_{timestamp}.log")
+    total_removed = 0
+    total_processed = 0
+    with open(summary_log, 'w', encoding='utf-8') as summary_f:
+        summary_f.write(f"Summary Filtering Log - {timestamp}\n")
+        summary_f.write(f"Input directory: {input_dir}\n")
+        summary_f.write(f"Output directory: {output_dir}\n")
+        summary_f.write(f"Duration range: {min_duration}s to {max_duration}s\n\n")
+        for filename in os.listdir(input_dir):
+            if filename.endswith('.json'):
+                input_path = os.path.join(input_dir, filename)
+                output_path = os.path.join(output_dir, filename)
+                print(f"\n处理文件: {filename}")
+                filter_by_duration(input_path, output_path, min_duration, max_duration)
+                # 读取单个文件日志以获取统计信息
+                log_dir = os.path.join(output_dir, "filter_logs")
+                latest_log = max(
+                    [f for f in os.listdir(log_dir) if f.startswith('removed_entries')],
+                    key=lambda f: os.path.getmtime(os.path.join(log_dir, f)))
+                with open(os.path.join(log_dir, latest_log), 'r', encoding='utf-8') as log_f:
+                    log_content = log_f.read()
+                    removed_count = log_content.count("Key: ")
+                summary_f.write(f"\nFile: {filename}\n")
+                summary_f.write(f"Removed entries: {removed_count}\n")
+                summary_f.write("-"*40 + "\n")
+                total_removed += removed_count
+                total_processed += 1
+        summary_f.write(f"\nTotal files processed: {total_processed}\n")
+        summary_f.write(f"Total entries removed: {total_removed}\n")
+    print(f"\n处理完成！所有文件的总日志已保存到: {summary_log}")
+if __name__ == "__main__":
+    # 使用示例 - 处理单个文件
+    input_json = "silence.json"  # 替换为你的输入文件路径
+    output_json = "silence_filtered_output.json"  # 输出文件路径
+    filter_by_duration(input_json, output_json)
+    # 使用示例 - 处理整个目录
+    # input_directory = "./input_4JOB_overlap"  # 替换为你的输入目录
+    # output_directory = "./filtered_output"  # 替换为你的输出目录
+    # process_directory(input_directory, output_directory)

4JOB/process_overlaps.py ADDED Viewed

	@@ -0,0 +1,106 @@

+import json
+import os
+import random
+def seconds_to_mmss(seconds):
+    minutes = int(seconds // 60)
+    seconds = int(seconds % 60)
+    return f"{minutes:02d}:{seconds:02d}"
+# Templates for overlap descriptions
+OVERLAP_TEMPLATES = [
+    "An overlap where multiple speakers talk simultaneously for more than 3 seconds starts at {start} and ends at {end}.",
+    "The overlap starts at {start} and ends at {end}.",
+    "Multiple speakers talk simultaneously from {start} to {end}, an overlap lasting over three seconds.",
+    "A conversation overlap occurs between {start} and {end}, with multiple people speaking at once.",
+    "There is a significant overlap in the conversation from {start} to {end}, where speakers talk simultaneously.",
+    "During the period {start}-{end}, multiple participants speak at the same time.",
+    "A 3+ second overlap is detected between {start} and {end}, with concurrent speakers.",
+    "The conversation features overlapping speech from {start} to {end}.",
+    "Several speakers talk over each other between {start} and {end}.",
+    "An overlapping segment is identified from {start} to {end}, lasting more than 3 seconds."
+]
+# Templates for no overlap case
+NO_OVERLAP_TEMPLATES = [
+    "No significant overlaps found in this conversation.",
+    "The conversation shows no overlapping speech segments longer than 3 seconds.",
+    "No instances of multiple speakers talking simultaneously for more than 3 seconds were detected.",
+    "The dialogue proceeds without any significant overlaps between speakers.",
+    "No overlapping segments exceeding 3 seconds were identified in this conversation.",
+    "The speakers maintain clear turn-taking without significant overlaps.",
+    "This conversation features no substantial overlapping speech periods.",
+    "No overlapping speech segments of 3 seconds or longer were found.",
+    "The conversation flows smoothly without any significant speaker overlaps.",
+    "All speakers maintain clear speaking turns without substantial overlap."
+]
+# Templates for correct overlap case
+CORRECT_OVERLAP_TEMPLATES = [
+    "The conversation contains overlapping speech segments longer than 3 seconds.",
+    "The dialogue features instances where speakers overlap for more than 3 seconds.",
+    "The recording includes multiple overlapping exchanges exceeding 3 seconds.",
+    "There are clear overlaps in speech lasting beyond 3 seconds.",
+    "Speaker interruptions or overlaps exceed 3 seconds in duration.",
+    "Clear evidence shows speech overlaps extending beyond 3 seconds in duration.",
+    "Speaker overlap durations consistently breach the 3-second threshold.",
+    "Recorded overlaps between speakers routinely last longer than 3 seconds.",
+    "The interaction contains several instances where voices overlap for over 3 seconds."
+]
+file = "silence"
+def process_overlap_segments():
+    # Read the overlap_5s_716.json file
+    with open(f'{file}.json', 'r', encoding='utf-8') as f:
+        overlap_data = json.load(f)
+    # List to store results for all conversations
+    results = []
+    # Process each conversation
+    for conversation_id, conversation in overlap_data.items():
+        segments = conversation.get('segments', [])
+        overlap_periods = []
+        audio_path = conversation.get('stereo_audio', [])
+        # Find overlaps > 5s between segments
+        for i in range(len(segments) - 1):
+            current = segments[i]
+            next_segment = segments[i + 1]
+            # Calculate overlap
+            overlap_start = max(current['start_time'], next_segment['start_time'])
+            overlap_end = min(current['end_time'], next_segment['end_time'])
+            overlap_duration = overlap_end - overlap_start
+            # If overlap is more than 5 seconds
+            if overlap_duration >= 3:
+                overlap_periods.append(f"{seconds_to_mmss(overlap_start)}-{seconds_to_mmss(overlap_end)}")
+        # Create result entry with random template
+        if overlap_periods:
+            # For each overlap period, select a random template
+            overlap_descriptions = []
+            for period in overlap_periods:
+                start, end = period.split('-')
+                template = random.choice(OVERLAP_TEMPLATES)
+                overlap_descriptions.append(template.format(start=start, end=end))
+            model_output = " ".join(overlap_descriptions)
+        else:
+            model_output = random.choice(NO_OVERLAP_TEMPLATES)
+        result = {
+            "key": conversation_id,
+            "audio_url": audio_path,
+            "model_output": model_output
+        }
+        results.append(result)
+    # Save the results to a JSON file
+    output_file = f'{file}_overlapgap.json'
+    with open(output_file, 'w', encoding='utf-8') as f:
+        json.dump(results, f, indent=2, ensure_ascii=False)
+    print(f"Processed {len(results)} conversations")
+    print(f"Results written to {output_file}")
+if __name__ == "__main__":
+    process_overlap_segments()

4JOB/process_silence.py ADDED Viewed

	@@ -0,0 +1,84 @@

+import json
+import os
+import random
+def seconds_to_mmss(seconds):
+    minutes = int(seconds // 60)
+    seconds = int(seconds % 60)
+    return f"{minutes:02d}:{seconds:02d}"
+# Templates for silence gap descriptions
+SILENCE_TEMPLATES = [
+    "Silence gaps longer than 3 seconds occur at: {gaps}",
+    "The conversation contains significant pauses at: {gaps}",
+    "There are silent periods of more than 3 seconds at: {gaps}",
+    "The dialogue features extended pauses at: {gaps}",
+    "Silent intervals exceeding 3 seconds are found at: {gaps}",
+    "The conversation includes notable gaps at: {gaps}",
+    "Extended periods of silence occur at: {gaps}",
+    "The dialogue has significant breaks at: {gaps}",
+    "Silent segments longer than 3 seconds appear at: {gaps}",
+    "The conversation shows substantial pauses at: {gaps}"
+]
+# Templates for no silence case
+NO_SILENCE_TEMPLATES = [
+    "No silence gaps longer than 3 seconds were found in this conversation.",
+    "The conversation flows continuously without significant pauses.",
+    "No extended periods of silence were detected in this dialogue.",
+    "The conversation maintains a steady pace without notable gaps.",
+    "No silent intervals exceeding 3 seconds were identified.",
+    "The dialogue proceeds without substantial pauses.",
+    "No significant breaks in conversation were observed.",
+    "The conversation shows no extended silent periods.",
+    "No notable gaps in speech were detected.",
+    "The dialogue continues without significant silent intervals."
+]
+file = "silence"
+def process_silence_gaps():
+    # Read the overlap_5s_716.json file
+    with open(f'{file}.json', 'r', encoding='utf-8') as f:
+        silence_data = json.load(f)
+    # List to store results for all conversations
+    results = []
+    # Process each conversation
+    for conversation_id, conversation in silence_data.items():
+        segments = conversation.get('segments', [])
+        audio_path = conversation.get('stereo_audio', [])
+        silence_gaps = []
+        # Find silence gaps > 3s between segments
+        for i in range(len(segments) - 1):
+            current_end = segments[i]['end_time']
+            next_start = segments[i + 1]['start_time']
+            gap_duration = next_start - current_end
+            if gap_duration > 3:
+                silence_gaps.append(f"{seconds_to_mmss(current_end)}-{seconds_to_mmss(next_start)}")
+        # Create result entry with random template
+        if silence_gaps:
+            template = random.choice(SILENCE_TEMPLATES)
+            model_output = template.format(gaps=', '.join(silence_gaps))
+        else:
+            model_output = random.choice(NO_SILENCE_TEMPLATES)
+        result = {
+            "key": conversation_id,
+            "audio_url": audio_path,
+            "model_output": model_output
+        }
+        results.append(result)
+    # Save the results to a JSON file
+    output_file = f'{file}_silencegap.json'
+    with open(output_file, 'w', encoding='utf-8') as f:
+        json.dump(results, f, indent=2, ensure_ascii=False)
+    print(f"Processed {len(results)} conversations")
+    print(f"Results written to {output_file}")
+if __name__ == "__main__":
+    process_silence_gaps()

4JOB/process_transcription.py ADDED Viewed

	@@ -0,0 +1,80 @@

+import json
+def seconds_to_mmss(seconds):
+    minutes = int(seconds // 60)
+    seconds = int(seconds % 60)
+    return f"{minutes:02d}:{seconds:02d}"
+filename = "silence"
+def is_overlapping(current_segment, other_segments):
+    """Check if the current segment overlaps with any other segment."""
+    current_start = current_segment['start_time']
+    current_end = current_segment['end_time']
+    for segment in other_segments:
+        if segment == current_segment:
+            continue
+        other_start = segment['start_time']
+        other_end = segment['end_time']
+        # Check if there's an overlap
+        if (current_start < other_end and current_end > other_start):
+            return True
+    return False
+def process_transcriptions():
+    # Read the overlap_5s_716.json file
+    with open(f'./{filename}.json', 'r', encoding='utf-8') as f:
+        data = json.load(f)
+    # List to store results for all conversations
+    results = []
+    # Process each conversation
+    for conversation_id, conversation in data.items():
+        segments = conversation.get('segments', [])
+        audio_path = conversation.get('stereo_audio', [])
+        # Sort segments by start time
+        segments.sort(key=lambda x: x['start_time'])
+        # Process each segment and create transcription lines
+        transcription_lines = []
+        for segment in segments:
+            speaker = segment['speaker']
+            start_time = segment['start_time']
+            end_time = segment['end_time']
+            text = segment['text']
+            original_text = segment['original_text']
+            original_text = original_text.replace("[interrupt] ", "").strip()
+            # Format timestamp
+            timestamp = f"[{seconds_to_mmss(start_time)} - {seconds_to_mmss(end_time)}]"
+            # Check if this segment overlaps with any other segment
+            has_overlap = is_overlapping(segment, segments)
+            # Format the line
+            if has_overlap:
+                line = f"{timestamp} Speaker {speaker}: {original_text}"
+            else:
+                line = f"{timestamp} Speaker {speaker}: {text}"
+            transcription_lines.append(line)
+        # Create result entry
+        result = {
+            "key": conversation_id,
+            "audio_url": audio_path,
+            "model_output": "\n".join(transcription_lines)
+        }
+        results.append(result)
+    # Save the results to a JSON file
+    output_file = f'./{filename}_transcription.json'
+    with open(output_file, 'w', encoding='utf-8') as f:
+        json.dump(results, f, indent=2, ensure_ascii=False)
+if __name__ == "__main__":
+    process_transcriptions()