import json import os import random def get_prompt_for_file(filename): if 'overlapgap' in filename: return overlap_prompt elif 'silencegap' in filename: return silence_prompt elif 'speaker' in filename: return speaker_prompt elif 'transcription' in filename: return transcript_prompt else: return None output_path = "/root/autodl-tmp/ms-swift/dataset_newCOTSFT1.jsonl" # with open(input_path, "r") as fin: # input_data = json.load(fin) www = "hello" www = ( "# Dialogue Response Evaluation\n\n" "**IMPORTANT:** Evaluation must include`` rating.\n\n" "Listen to the dialogue recording (two sentences, 1-second pause in between). Evaluate the quality of the **second sentence** as a response to the first, focusing on **text relevance** and the **appropriateness** of **Linguistic information (a range of paralinguistic information such as emotion/age/pitch/speed/volume)**.\n" "**Note:** Focus on evaluating the appropriateness of the second sentence relative to the first, even if the first sentence itself contains contradictory information.\n\n" "## Scoring Criteria\n\n" "**1 points**: Text content is irrelevant or incorrect or illogical.(low intelligence)\n" "**3 points**: Text is relevant, but paralinguistic information is **inappropriate** for the context.(low emotional quotient)\n" "**5 points**: Text is relevant, and paralinguistic information is **appropriate** for the context, resulting in effective communication.(High intelligence and emotional intelligence.)\n\n" "## Evaluation Requirements\n\n" "Response **MUST** follow this format:\n\n" "X (**X is 1, 3, or 5**)\n\n") # www = ( # "# Interactional Dialogue Evaluation\n\n" # "**IMPORTANT**: Evaluation must include `` and `` analysis and `` rating.\n" # "Listen to a two-person interactional dialogue speech (Dual-channel audio, with each channel representing one speaker), labeled as speakers A and B. Evaluate the quality of the interaction, focusing on:\n" # "**Response Relevance:** \n" # "**logical consistency, topic coherence**\n" # "**Interactional Fluency:**\n" # "**Strictly detect dual-tracked vocal overlap >3s (cross-channel analysis)**\n" # "**Pauses >5s between turns (must evaluate) \n\n**" # "**Note**: Small pauses and brief overlaps in audio are acceptable, while prolonged pauses and overlapping audio are harmful. You should consider Response Relevance and Interactional Fluency separately, and provide the corresponding thinking process.\n\n" # "## Scoring Criteria\n" # "Assign a single holistic score based on the combined evaluation:\n" # "`1` (Poor): Significant issues in either **Response Relevance ** or **Interactional Fluency. **\n" # "`2` (Excellent): Both **Response Relevance ** and **Interactional Fluency ** are consistently appropriate and natural.\n" # "## Evaluation Output Format:\n" # "Strictly follow this template:\n" # "\n" # "[Analysing Response Relevance and giving reasons for scoring...]\n" # "\n" # "\n" # "[Analysing Interactional Fluency and giving reasons for scoring.]\n" # "\n" # "X\n" # ) # www = ( # "# Interactional Dialogue Evaluation\n\n" # "**IMPORTANT**: Evaluation must include `` and `` analysis and `` rating.\n" # "Listen to a two-person interactional dialogue speech (Dual-channel audio, with each channel representing one speaker), labeled as speakers A and B. Evaluate the quality of the interaction, focusing on:\n" # "**Response Relevance:** \n" # "**logical consistency, topic coherence**\n" # "**Interactional Fluency:**\n" # "**Strictly detect dual-tracked vocal overlap >3s (cross-channel analysis)**\n" # "**Pauses >5s between turns (must evaluate) \n\n**" # "**Note**: Small pauses and brief overlaps in audio are acceptable, while prolonged pauses and overlapping audio are harmful. You should consider Response Relevance and Interactional Fluency separately, and provide the corresponding thinking process.\n\n" # "## Scoring Criteria\n" # "Assign a single holistic score based on the combined evaluation:\n" # "`1` (Poor): Significant issues in either **Response Relevance ** or **Interactional Fluency. **\n" # "`2` (Excellent): Both **Response Relevance ** and **Interactional Fluency ** are consistently appropriate and natural.\n" # "## Evaluation Output Format:\n" # "Strictly follow this template:\n" # "\n" # "[Analysing Response Relevance and giving reasons for scoring...]\n" # "\n" # "\n" # "[Analysing Interactional Fluency and giving reasons for scoring.]\n" # "\n" # "X\n" # ) overlap_prompt = ( "Analyze the dual-channel audio and identify segments where multiple speakers are talking simultaneously for more than 3 seconds. \n" "Simply tell me when the overlap starts and ends in MM:SS format. \n" "Just one simple sentence about the overlap timing. Keep the word count within 40 words." ) overlap_prompt_yes = ( "Analyze the dual-channel audio and identify segments where multiple speakers are talking simultaneously for more than 3 seconds. \n" "Simply tell me whether the audio has overlap segments. \n" "Just one simple sentence about the overlap segments. Keep the word count within 40 words." ) silence_prompt = ( "Analyze the dual-channel audio and identify segments where multiple speakers are silent for more than 3 seconds. \n" "Simply tell me when the silence starts and ends in MM:SS format. \n" "Just one simple sentence about the silence timing. Keep the word count within 40 words." ) speaker_prompt = ( "Analyze the dual-channel audio and detect individual speakers. \n" "List the speaking segments for each speaker in MM:SS-MM:SS format. \n" "Only output speaker labels and time segments in a similar format. \n" ) transcript_prompt = ( "Analyze the dual-channel audio and transcript each speaker's sentences with timestamps. \n" "List the speaking segments and transcript text for each speaker in MM:SS-MM:SS format. \n" "Only output time segments, speaker labels, and transcript text in a similar format.\n" ) # Process files in the silence_overlaps directory input_dir = "/root/autodl-tmp/ms-swift/input" all_data = [] prompt_template = ( "# Interactional Dialogue Evaluation\n\n" "**IMPORTANT**: Evaluation must include `` and `` analysis and `` rating.\n" "Listen to a two-person interactional dialogue speech (Dual-channel audio, with each channel representing one speaker), labeled as speakers A and B. Evaluate the quality of the interaction, focusing on:\n" "**Response Relevance:** \n" "**logical consistency, topic coherence**\n" "**Interactional Fluency:**\n" "**Detect and evaluate extended vocal overlaps, e.g., cross-channel overlap.**\n" "**Detect and evaluate long pauses, e.g., pauses more than 3s between speaker turns.\n\n**" "**Note**: Small pauses and brief overlaps in audio are acceptable, while prolonged pauses and overlapping audio are harmful. You should consider Response Relevance and Interactional Fluency separately, and provide the corresponding thinking process.\n\n" "## Scoring Criteria\n" "Assign a single holistic score based on the combined evaluation:\n" "`1` (Poor): Significant issues in either **Response Relevance ** or **Interactional Fluency. **\n" "`2` (Excellent): Both **Response Relevance ** and **Interactional Fluency ** are consistently appropriate and natural.\n" "## Evaluation Output Format:\n" "Strictly follow this template:\n" "\n" "[Analysing Response Relevance and giving reasons for scoring...]\n" "\n" "\n" "[Analysing Interactional Fluency and giving reasons for scoring.]\n" "\n" "X\n" ) # prompt_template4Job = ( # "# Interactional Dialogue Evaluation\n\n" # "**IMPORTANT**: Evaluation must include `` and `` analysis and `` rating.\n" # "Listen to a two-person interactional dialogue speech (Dual-channel audio, with each channel representing one speaker), labeled as speakers A and B. Evaluate the quality of the interaction, focusing on:\n" # "**Response Relevance:** \n" # "**logical consistency, topic coherence**\n" # "**Interactional Fluency:**\n" # "**Detect and evaluate extended vocal overlaps, e.g., cross-channel overlap.**\n" # "**Detect and evaluate long pauses, e.g., pauses more than 3s between speaker turns.\n\n**" # "**Note**: Small pauses and brief overlaps in audio are acceptable, while prolonged pauses and overlapping audio are harmful. You should consider Response Relevance and Interactional Fluency separately, and provide the corresponding thinking process.\n\n" # "## Error Type\n" # "Assign a single holistic score based on the combined evaluation:\n" # "`1` (Correct): Responses are logically consistent and relevant. No problematic overlaps or silences.\n" # "`2` (Detect overlap): Extended or frequent overlapping speech is present.\n" # "`3` (Detect silence): Long pauses (>3 seconds) occur between turns.\n" # "`4` (Detect text error): Responses are off-topic or disconnected.\n" # "## Evaluation Output Format:\n" # "Strictly follow this template:\n" # "\n" # "[Analysing Response Relevance and giving reasons for scoring...]\n" # "\n" # "\n" # "[Analysing Interactional Fluency and giving reasons for scoring.]\n" # "\n" # "X\n" # ) # Process each file for filename in os.listdir(input_dir): input_path = os.path.join(input_dir, filename) if not os.path.isfile(input_path): continue # Get the appropriate prompt for this file prompt = prompt_template if prompt is None: print(f"Skipping {filename} - no matching prompt found") continue # Read input data with open(input_path, "r") as fin: input_data = json.load(fin) # Process each item for item in input_data: data = { "messages": [ {"role": "user", "content": f"