Spaces:
Sleeping
Sleeping
Update annotated_casl_app.py
Browse files- annotated_casl_app.py +59 -195
annotated_casl_app.py
CHANGED
|
@@ -118,13 +118,8 @@ def combine_sections_smartly(sections_dict):
|
|
| 118 |
"4. FIGURATIVE LANGUAGE ANALYSIS",
|
| 119 |
"5. PRAGMATIC LANGUAGE ASSESSMENT",
|
| 120 |
"6. VOCABULARY AND SEMANTIC ANALYSIS",
|
| 121 |
-
"7.
|
| 122 |
-
"8.
|
| 123 |
-
"9. COGNITIVE-LINGUISTIC FACTORS",
|
| 124 |
-
"10. FLUENCY AND RHYTHM ANALYSIS",
|
| 125 |
-
"11. QUANTITATIVE METRICS",
|
| 126 |
-
"12. CLINICAL IMPLICATIONS",
|
| 127 |
-
"13. PROGNOSIS AND SUMMARY"
|
| 128 |
]
|
| 129 |
|
| 130 |
combined_parts = []
|
|
@@ -205,8 +200,6 @@ def answer_quick_question(transcript_content, question, age, gender, slp_notes):
|
|
| 205 |
|
| 206 |
prompt = f"""
|
| 207 |
You are a speech-language pathologist answering a specific question about a speech sample.
|
| 208 |
-
|
| 209 |
-
Patient: {age}-year-old {gender}
|
| 210 |
|
| 211 |
TRANSCRIPT:
|
| 212 |
{transcript_content}{notes_section}
|
|
@@ -217,7 +210,7 @@ def answer_quick_question(transcript_content, question, age, gender, slp_notes):
|
|
| 217 |
- Provide a focused, detailed answer to the specific question asked
|
| 218 |
- Include specific examples from the transcript with exact quotes
|
| 219 |
- Provide quantitative data when relevant (counts, percentages, rates)
|
| 220 |
-
-
|
| 221 |
- Keep the response focused on the question but thorough in analysis
|
| 222 |
- If the question relates to multiple areas, address all relevant aspects
|
| 223 |
|
|
@@ -263,10 +256,9 @@ def analyze_targeted_area(transcript_content, analysis_area, age, gender, slp_no
|
|
| 263 |
- Note any fluency-enhancing contexts
|
| 264 |
- Assess overall speech rhythm and flow
|
| 265 |
|
| 266 |
-
4.
|
| 267 |
-
-
|
| 268 |
-
-
|
| 269 |
-
- Recommend intervention priorities
|
| 270 |
""",
|
| 271 |
|
| 272 |
"Grammar and Syntax": """
|
|
@@ -288,10 +280,9 @@ def analyze_targeted_area(transcript_content, analysis_area, age, gender, slp_no
|
|
| 288 |
- Count subject-verb agreement errors
|
| 289 |
- Assess auxiliary verb usage
|
| 290 |
|
| 291 |
-
4.
|
| 292 |
-
-
|
| 293 |
-
-
|
| 294 |
-
- Recommend specific therapy goals
|
| 295 |
""",
|
| 296 |
|
| 297 |
"Vocabulary and Semantics": """
|
|
@@ -317,9 +308,9 @@ def analyze_targeted_area(transcript_content, analysis_area, age, gender, slp_no
|
|
| 317 |
- Identify compensatory strategies
|
| 318 |
- Assess overall lexical access efficiency
|
| 319 |
|
| 320 |
-
5.
|
| 321 |
-
-
|
| 322 |
-
-
|
| 323 |
""",
|
| 324 |
|
| 325 |
"Pragmatics and Discourse": """
|
|
@@ -345,9 +336,9 @@ def analyze_targeted_area(transcript_content, analysis_area, age, gender, slp_no
|
|
| 345 |
- Assess contextual appropriateness
|
| 346 |
- Evaluate social awareness in communication
|
| 347 |
|
| 348 |
-
5.
|
| 349 |
-
-
|
| 350 |
-
-
|
| 351 |
""",
|
| 352 |
|
| 353 |
"Sentence Complexity": """
|
|
@@ -370,12 +361,11 @@ def analyze_targeted_area(transcript_content, analysis_area, age, gender, slp_no
|
|
| 370 |
|
| 371 |
4. SYNTACTIC MATURITY:
|
| 372 |
- Calculate Mean Length of Utterance (MLU)
|
| 373 |
-
-
|
| 374 |
-
- Identify areas for syntactic growth
|
| 375 |
|
| 376 |
-
5.
|
| 377 |
-
-
|
| 378 |
-
-
|
| 379 |
""",
|
| 380 |
|
| 381 |
"Word Finding and Retrieval": """
|
|
@@ -401,10 +391,9 @@ def analyze_targeted_area(transcript_content, analysis_area, age, gender, slp_no
|
|
| 401 |
- Assess impact of topic familiarity
|
| 402 |
- Evaluate effect of linguistic complexity on retrieval
|
| 403 |
|
| 404 |
-
5.
|
| 405 |
-
-
|
| 406 |
-
-
|
| 407 |
-
- Suggest compensatory strategy training
|
| 408 |
"""
|
| 409 |
}
|
| 410 |
|
|
@@ -413,8 +402,6 @@ def analyze_targeted_area(transcript_content, analysis_area, age, gender, slp_no
|
|
| 413 |
|
| 414 |
prompt = f"""
|
| 415 |
You are a speech-language pathologist conducting a targeted analysis of a specific area.
|
| 416 |
-
|
| 417 |
-
Patient: {age}-year-old {gender}
|
| 418 |
|
| 419 |
TRANSCRIPT:
|
| 420 |
{transcript_content}{notes_section}
|
|
@@ -426,8 +413,8 @@ def analyze_targeted_area(transcript_content, analysis_area, age, gender, slp_no
|
|
| 426 |
INSTRUCTIONS:
|
| 427 |
- Provide specific examples with exact quotes from the transcript
|
| 428 |
- Include quantitative data (counts, percentages, rates per 100 words)
|
| 429 |
-
-
|
| 430 |
-
-
|
| 431 |
- Be thorough but focused on the specified area
|
| 432 |
|
| 433 |
Conduct the targeted analysis:
|
|
@@ -487,8 +474,6 @@ def annotate_transcript(transcript_content, age, gender, slp_notes):
|
|
| 487 |
|
| 488 |
annotation_prompt = f"""
|
| 489 |
You are a speech-language pathologist preparing a transcript for detailed analysis. Your task is to ANNOTATE the ENTIRE transcript with linguistic markers at a WORD-BY-WORD level.
|
| 490 |
-
|
| 491 |
-
Patient: {age}-year-old {gender}
|
| 492 |
|
| 493 |
ORIGINAL TRANSCRIPT:
|
| 494 |
{transcript_content}{notes_section}
|
|
@@ -605,14 +590,12 @@ def analyze_annotated_transcript(annotated_transcript, age, gender, slp_notes):
|
|
| 605 |
"""
|
| 606 |
|
| 607 |
analysis_prompt = f"""
|
| 608 |
-
You are a speech-language pathologist conducting a comprehensive analysis of an annotated speech sample. Provide
|
| 609 |
|
| 610 |
-
Patient: {age}-year-old {gender}
|
| 611 |
-
|
| 612 |
ANNOTATED TRANSCRIPT:
|
| 613 |
{annotated_transcript}{notes_section}
|
| 614 |
|
| 615 |
-
INSTRUCTIONS: Complete ALL
|
| 616 |
|
| 617 |
COMPREHENSIVE SPEECH SAMPLE ANALYSIS
|
| 618 |
|
|
@@ -637,7 +620,6 @@ def analyze_annotated_transcript(annotated_transcript, age, gender, slp_notes):
|
|
| 637 |
* Semantic revisions: "car- I mean bike"
|
| 638 |
- Pauses ([PAUSE]): Count hesitation markers and silent pauses
|
| 639 |
- Total disfluency rate: Calculate combined rate per 100 words
|
| 640 |
-
- Severity assessment: Compare to age norms
|
| 641 |
|
| 642 |
B. Word Retrieval Issues (detailed analysis):
|
| 643 |
- Circumlocutions ([CIRCUMLOCUTION]): Count and analyze strategies
|
|
@@ -709,7 +691,6 @@ def analyze_annotated_transcript(annotated_transcript, age, gender, slp_notes):
|
|
| 709 |
- Clauses per utterance ratio
|
| 710 |
- Subordination index
|
| 711 |
- Coordination index
|
| 712 |
-
- Developmental appropriateness assessment
|
| 713 |
|
| 714 |
4. FIGURATIVE LANGUAGE ANALYSIS
|
| 715 |
|
|
@@ -718,7 +699,7 @@ def analyze_annotated_transcript(annotated_transcript, age, gender, slp_notes):
|
|
| 718 |
* Metaphors: "Time is money"
|
| 719 |
* Similes: "Fast as lightning"
|
| 720 |
* Idioms: "Raining cats and dogs"
|
| 721 |
-
- Appropriateness assessment: Context
|
| 722 |
- Comprehension vs. production abilities
|
| 723 |
- Abstract language development indicators
|
| 724 |
|
|
@@ -754,7 +735,7 @@ def analyze_annotated_transcript(annotated_transcript, age, gender, slp_notes):
|
|
| 754 |
- Vocabulary breadth: Range of semantic categories
|
| 755 |
- Vocabulary depth: Precision and nuance within categories
|
| 756 |
- Academic vs. conversational vocabulary ratio
|
| 757 |
-
-
|
| 758 |
|
| 759 |
7. NLP-DERIVED LINGUISTIC FEATURES (use bullet lists, NO tables)
|
| 760 |
|
|
@@ -769,13 +750,13 @@ def analyze_annotated_transcript(annotated_transcript, age, gender, slp_notes):
|
|
| 769 |
* Provide exact MTLD score and interpretation
|
| 770 |
- Hypergeometric Distribution D (HDD): Probability-based diversity measure
|
| 771 |
* Controls for text length effects
|
| 772 |
-
* Provide HDD score
|
| 773 |
|
| 774 |
B. Word Frequency Analysis (as bullet list, not table):
|
| 775 |
- Most frequent words used: List top 10 as "word (count)" format
|
| 776 |
- High-frequency vs. low-frequency word distribution
|
| 777 |
- Function words vs. content words ratio
|
| 778 |
-
- Repetitive word patterns
|
| 779 |
|
| 780 |
C. Linguistic Complexity Indicators (bullet format):
|
| 781 |
- Average word length in syllables
|
|
@@ -789,7 +770,7 @@ def analyze_annotated_transcript(annotated_transcript, age, gender, slp_notes):
|
|
| 789 |
- Derivational morphology: Prefixes and suffixes
|
| 790 |
- Inflectional morphology: Tense, number, case markers
|
| 791 |
- Morphological awareness indicators
|
| 792 |
-
- Error patterns
|
| 793 |
|
| 794 |
B. Phonological Considerations:
|
| 795 |
- Sound pattern analysis (if evident in transcript)
|
|
@@ -811,75 +792,13 @@ def analyze_annotated_transcript(annotated_transcript, age, gender, slp_notes):
|
|
| 811 |
C. Executive Function Evidence:
|
| 812 |
- Self-monitoring and error correction
|
| 813 |
- Planning and organization in discourse
|
| 814 |
-
- Cognitive flexibility in topic management
|
| 815 |
-
|
| 816 |
-
10. FLUENCY AND RHYTHM ANALYSIS
|
| 817 |
-
|
| 818 |
-
A. Disfluency Patterns:
|
| 819 |
-
- Total disfluency count and rate per 100 words
|
| 820 |
-
- Disfluency type distribution
|
| 821 |
-
- Clustering patterns and severity assessment
|
| 822 |
-
- Impact on communication effectiveness
|
| 823 |
-
|
| 824 |
-
B. Speech Flow and Rhythm:
|
| 825 |
-
- Natural pause patterns vs. disrupted flow
|
| 826 |
-
- Rhythm and prosodic patterns (if evident)
|
| 827 |
-
- Overall fluency profile and age-appropriateness
|
| 828 |
-
|
| 829 |
-
11. QUANTITATIVE METRICS
|
| 830 |
-
|
| 831 |
-
A. Basic Measures:
|
| 832 |
-
- Total words: [exact count]
|
| 833 |
-
- Total sentences: [exact count]
|
| 834 |
-
- Unique words: [exact count]
|
| 835 |
-
- MLU words: [calculation with formula shown]
|
| 836 |
-
- MLU morphemes: [calculation with formula shown]
|
| 837 |
-
- Type-Token Ratio: [calculation and interpretation]
|
| 838 |
-
|
| 839 |
-
B. Error Rates and Ratios:
|
| 840 |
-
- Disfluency rate per 100 words
|
| 841 |
-
- Grammatical accuracy percentage
|
| 842 |
-
- Vocabulary sophistication ratio
|
| 843 |
-
- Sentence complexity distribution percentages
|
| 844 |
-
|
| 845 |
-
12. CLINICAL IMPLICATIONS
|
| 846 |
-
|
| 847 |
-
A. Strengths (ranked by prominence):
|
| 848 |
-
- Primary strengths with supporting evidence
|
| 849 |
-
- Secondary strengths with examples
|
| 850 |
-
- Compensatory strategies observed
|
| 851 |
-
|
| 852 |
-
B. Areas of Need (prioritized by severity):
|
| 853 |
-
- Primary concerns with impact assessment
|
| 854 |
-
- Secondary concerns with supporting data
|
| 855 |
-
- Developmental vs. disorder considerations
|
| 856 |
-
|
| 857 |
-
C. Treatment Recommendations:
|
| 858 |
-
- Specific, measurable therapy goals
|
| 859 |
-
- Intervention approaches and techniques
|
| 860 |
-
- Frequency and duration recommendations
|
| 861 |
-
- Progress monitoring strategies
|
| 862 |
-
|
| 863 |
-
13. PROGNOSIS AND SUMMARY
|
| 864 |
-
|
| 865 |
-
A. Overall Communication Profile:
|
| 866 |
-
- Comprehensive summary of findings
|
| 867 |
-
- Developmental appropriateness assessment
|
| 868 |
-
- Functional communication impact
|
| 869 |
-
|
| 870 |
-
B. Treatment Planning:
|
| 871 |
-
- Priority intervention targets
|
| 872 |
-
- Expected outcomes and timeline
|
| 873 |
-
- Follow-up assessment recommendations
|
| 874 |
-
- Family/educational recommendations
|
| 875 |
-
|
| 876 |
CRITICAL REQUIREMENTS:
|
| 877 |
-
1. Complete ALL
|
| 878 |
2. Provide exact counts for all markers with specific examples
|
| 879 |
3. Calculate all percentages and rates with formulas shown
|
| 880 |
4. Include direct quotes from transcript for examples
|
| 881 |
-
5.
|
| 882 |
-
6.
|
| 883 |
7. If response is incomplete, end with <CONTINUE>
|
| 884 |
8. FORMATTING: Use NO asterisks (**), NO hashtags (###), NO bolding - plain text only
|
| 885 |
"""
|
|
@@ -1373,13 +1292,8 @@ def call_claude_api_with_continuation(prompt):
|
|
| 1373 |
"4. FIGURATIVE LANGUAGE ANALYSIS",
|
| 1374 |
"5. PRAGMATIC LANGUAGE ASSESSMENT",
|
| 1375 |
"6. VOCABULARY AND SEMANTIC ANALYSIS",
|
| 1376 |
-
"7.
|
| 1377 |
-
"8.
|
| 1378 |
-
"9. COGNITIVE-LINGUISTIC FACTORS",
|
| 1379 |
-
"10. FLUENCY AND RHYTHM ANALYSIS",
|
| 1380 |
-
"11. QUANTITATIVE METRICS",
|
| 1381 |
-
"12. CLINICAL IMPLICATIONS",
|
| 1382 |
-
"13. PROGNOSIS AND SUMMARY"
|
| 1383 |
]
|
| 1384 |
|
| 1385 |
# Safety limits to prevent infinite loops
|
|
@@ -1631,7 +1545,7 @@ def analyze_with_backup(annotated_transcript, original_transcript, age, gender,
|
|
| 1631 |
- Count [REPETITION] markers: Categorize by type (word, phrase, sound)
|
| 1632 |
- Count [REVISION] markers: Analyze self-correction patterns
|
| 1633 |
- Count [PAUSE] markers: Assess hesitation frequency
|
| 1634 |
-
- Calculate total disfluency rate
|
| 1635 |
|
| 1636 |
B. Word Retrieval Issues:
|
| 1637 |
- Count [CIRCUMLOCUTION] markers: List each roundabout description
|
|
@@ -1674,7 +1588,7 @@ def analyze_with_backup(annotated_transcript, original_transcript, age, gender,
|
|
| 1674 |
|
| 1675 |
C. Sentence Structure Analysis:
|
| 1676 |
- Use calculated MLU: {linguistic_metrics.get('mlu_words', 0)} words, {linguistic_metrics.get('mlu_morphemes', 0)} morphemes
|
| 1677 |
-
- Calculate complexity ratios
|
| 1678 |
|
| 1679 |
4. FIGURATIVE LANGUAGE ANALYSIS (with exact counts):
|
| 1680 |
|
|
@@ -1754,25 +1668,14 @@ def analyze_with_backup(annotated_transcript, original_transcript, age, gender,
|
|
| 1754 |
- Grammar error rate: Calculate from marker counts
|
| 1755 |
- Vocabulary sophistication ratio: {marker_analysis.get('category_totals', {}).get('vocab_sophistication_ratio', 0):.3f}
|
| 1756 |
|
| 1757 |
-
11. CLINICAL IMPLICATIONS:
|
| 1758 |
-
- Primary strengths: List with supporting evidence from markers and metrics
|
| 1759 |
-
- Primary weaknesses: Rank by severity with exact counts
|
| 1760 |
-
- Intervention priorities: Based on error frequency and impact
|
| 1761 |
-
- Therapy targets: Specific, measurable goals
|
| 1762 |
-
|
| 1763 |
-
12. PROGNOSIS AND SUMMARY:
|
| 1764 |
-
- Overall communication profile with percentile estimates
|
| 1765 |
-
- Developmental appropriateness assessment
|
| 1766 |
-
- Summary of key findings from quantitative analysis
|
| 1767 |
-
- Priority treatment goals and expected outcomes
|
| 1768 |
-
|
| 1769 |
CRITICAL REQUIREMENTS:
|
| 1770 |
- Use the provided calculated metrics in your analysis
|
| 1771 |
- Provide EXACT counts for every marker type
|
| 1772 |
- Calculate precise percentages and show your work
|
| 1773 |
- Give specific examples from the transcript
|
| 1774 |
- If annotation is incomplete, supplement with analysis of the original transcript
|
| 1775 |
-
- Complete ALL
|
|
|
|
| 1776 |
"""
|
| 1777 |
|
| 1778 |
return call_claude_api_with_continuation(analysis_prompt)
|
|
@@ -2148,20 +2051,17 @@ with gr.Blocks(title="Speech Analysis", theme=gr.themes.Soft()) as demo:
|
|
| 2148 |
|
| 2149 |
Provide a comprehensive clinical interpretation organized into these sections:
|
| 2150 |
|
| 2151 |
-
1. LEXICAL DIVERSITY
|
| 2152 |
-
-
|
| 2153 |
-
-
|
| 2154 |
-
- Clinical significance of diversity patterns
|
| 2155 |
|
| 2156 |
-
2. FLUENCY PATTERN
|
| 2157 |
-
-
|
| 2158 |
-
-
|
| 2159 |
-
- Impact on communication effectiveness
|
| 2160 |
|
| 2161 |
-
3. GRAMMATICAL
|
| 2162 |
-
-
|
| 2163 |
-
-
|
| 2164 |
-
- Areas of strength vs. weakness
|
| 2165 |
|
| 2166 |
4. VOCABULARY AND SEMANTIC ANALYSIS:
|
| 2167 |
- Interpretation of vocabulary sophistication measures
|
|
@@ -2178,15 +2078,9 @@ with gr.Blocks(title="Speech Analysis", theme=gr.themes.Soft()) as demo:
|
|
| 2178 |
- Strengths and areas of need
|
| 2179 |
- Functional communication impact
|
| 2180 |
|
| 2181 |
-
|
| 2182 |
-
- Specific intervention targets based on verified data
|
| 2183 |
-
- Therapy approaches and techniques
|
| 2184 |
-
- Progress monitoring suggestions
|
| 2185 |
-
- Prognosis and expected outcomes
|
| 2186 |
-
|
| 2187 |
-
Focus on INTERPRETATION and CLINICAL SIGNIFICANCE, not counting.
|
| 2188 |
All measurements are already verified and accurate.
|
| 2189 |
-
Cite specific examples from the transcript to support your
|
| 2190 |
"""
|
| 2191 |
|
| 2192 |
ai_interpretation = call_claude_api(verified_prompt)
|
|
@@ -2274,16 +2168,16 @@ with gr.Blocks(title="Speech Analysis", theme=gr.themes.Soft()) as demo:
|
|
| 2274 |
- Filler words: Use verified count of {marker_counts.get('FILLER', 0)} fillers
|
| 2275 |
* Calculate rate per 100 words: {marker_counts.get('FILLER', 0)/total_words*100:.2f}%
|
| 2276 |
* Identify types and provide examples from transcript
|
| 2277 |
-
*
|
| 2278 |
- False starts: Use verified count of {marker_counts.get('FALSE_START', 0)}
|
| 2279 |
* Provide specific examples from transcript
|
| 2280 |
* Analyze patterns and self-correction abilities
|
| 2281 |
- Repetitions: Use verified count of {marker_counts.get('REPETITION', 0)}
|
| 2282 |
* Categorize types (word, phrase, sound level)
|
| 2283 |
-
* Provide examples and
|
| 2284 |
- Total disfluency assessment: Use verified total of {category_totals['fluency_issues']}
|
| 2285 |
* Rate: {category_totals['fluency_issues']/total_words*100:.2f} per 100 words
|
| 2286 |
-
*
|
| 2287 |
|
| 2288 |
B. Word Retrieval Issues:
|
| 2289 |
- Circumlocutions: Count and analyze from transcript
|
|
@@ -2308,7 +2202,7 @@ with gr.Blocks(title="Speech Analysis", theme=gr.themes.Soft()) as demo:
|
|
| 2308 |
|
| 2309 |
B. Grammar and Morphology:
|
| 2310 |
- Error pattern analysis using verified counts
|
| 2311 |
-
-
|
| 2312 |
- Morphological complexity evaluation
|
| 2313 |
|
| 2314 |
3. COMPLEX SENTENCE ANALYSIS (use verified counts)
|
|
@@ -2335,7 +2229,7 @@ with gr.Blocks(title="Speech Analysis", theme=gr.themes.Soft()) as demo:
|
|
| 2335 |
- Tangential speech: Use verified count of {marker_counts.get('TANGENT', 0)}
|
| 2336 |
- Coherence breaks: Use verified count of {marker_counts.get('COHERENCE_BREAK', 0)}
|
| 2337 |
- Referential clarity: Use verified count of {marker_counts.get('PRONOUN_REF', 0)}
|
| 2338 |
-
- Overall conversational
|
| 2339 |
|
| 2340 |
6. VOCABULARY AND SEMANTIC ANALYSIS
|
| 2341 |
- Semantic errors: Use verified count of {marker_counts.get('SEMANTIC_ERROR', 0)}
|
|
@@ -2347,21 +2241,9 @@ with gr.Blocks(title="Speech Analysis", theme=gr.themes.Soft()) as demo:
|
|
| 2347 |
- Morphological complexity assessment
|
| 2348 |
- Derivational and inflectional morphology patterns
|
| 2349 |
- Error analysis using verified counts
|
| 2350 |
-
-
|
| 2351 |
-
|
| 2352 |
-
8. COGNITIVE-LINGUISTIC FACTORS
|
| 2353 |
-
- Working memory indicators from sentence complexity
|
| 2354 |
-
- Processing speed markers from fluency patterns
|
| 2355 |
-
- Executive function evidence from self-corrections
|
| 2356 |
-
- Attention and cognitive load management
|
| 2357 |
|
| 2358 |
-
|
| 2359 |
-
- Disfluency pattern analysis using verified counts
|
| 2360 |
-
- Speech rhythm and flow assessment
|
| 2361 |
-
- Natural vs. disrupted pause patterns
|
| 2362 |
-
- Overall fluency profile
|
| 2363 |
-
|
| 2364 |
-
10. QUANTITATIVE METRICS (use ALL verified data)
|
| 2365 |
- Total words: {total_words}
|
| 2366 |
- Total sentences: {linguistic_metrics.get('total_sentences', 0)}
|
| 2367 |
- Unique words: {linguistic_metrics.get('unique_words', 0)}
|
|
@@ -2369,25 +2251,7 @@ with gr.Blocks(title="Speech Analysis", theme=gr.themes.Soft()) as demo:
|
|
| 2369 |
- MLU morphemes: {linguistic_metrics.get('mlu_morphemes', 0):.2f}
|
| 2370 |
- All error rates and ratios from verified counts
|
| 2371 |
|
| 2372 |
-
|
| 2373 |
-
A. Strengths (with supporting evidence):
|
| 2374 |
-
- Identify primary strengths using verified data
|
| 2375 |
-
- Provide specific examples from transcript
|
| 2376 |
-
|
| 2377 |
-
B. Areas of Need (prioritized by severity):
|
| 2378 |
-
- Primary concerns based on verified counts and rates
|
| 2379 |
-
- Secondary areas for intervention
|
| 2380 |
-
|
| 2381 |
-
C. Treatment Recommendations:
|
| 2382 |
-
- Specific, measurable therapy goals
|
| 2383 |
-
- Evidence-based intervention approaches
|
| 2384 |
-
- Progress monitoring strategies
|
| 2385 |
-
|
| 2386 |
-
12. PROGNOSIS AND SUMMARY
|
| 2387 |
-
- Overall communication profile synthesis
|
| 2388 |
-
- Functional impact assessment
|
| 2389 |
-
- Treatment planning and expected outcomes
|
| 2390 |
-
- Follow-up recommendations
|
| 2391 |
|
| 2392 |
CRITICAL: Complete ALL 13 sections using verified data and specific transcript examples.
|
| 2393 |
"""
|
|
|
|
| 118 |
"4. FIGURATIVE LANGUAGE ANALYSIS",
|
| 119 |
"5. PRAGMATIC LANGUAGE ASSESSMENT",
|
| 120 |
"6. VOCABULARY AND SEMANTIC ANALYSIS",
|
| 121 |
+
"7. MORPHOLOGICAL AND PHONOLOGICAL ANALYSIS",
|
| 122 |
+
"8. QUANTITATIVE METRICS AND NLP FEATURES"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 123 |
]
|
| 124 |
|
| 125 |
combined_parts = []
|
|
|
|
| 200 |
|
| 201 |
prompt = f"""
|
| 202 |
You are a speech-language pathologist answering a specific question about a speech sample.
|
|
|
|
|
|
|
| 203 |
|
| 204 |
TRANSCRIPT:
|
| 205 |
{transcript_content}{notes_section}
|
|
|
|
| 210 |
- Provide a focused, detailed answer to the specific question asked
|
| 211 |
- Include specific examples from the transcript with exact quotes
|
| 212 |
- Provide quantitative data when relevant (counts, percentages, rates)
|
| 213 |
+
- Provide objective data interpretation only
|
| 214 |
- Keep the response focused on the question but thorough in analysis
|
| 215 |
- If the question relates to multiple areas, address all relevant aspects
|
| 216 |
|
|
|
|
| 256 |
- Note any fluency-enhancing contexts
|
| 257 |
- Assess overall speech rhythm and flow
|
| 258 |
|
| 259 |
+
4. OBJECTIVE SUMMARY:
|
| 260 |
+
- Provide data summary only
|
| 261 |
+
- List observed patterns
|
|
|
|
| 262 |
""",
|
| 263 |
|
| 264 |
"Grammar and Syntax": """
|
|
|
|
| 280 |
- Count subject-verb agreement errors
|
| 281 |
- Assess auxiliary verb usage
|
| 282 |
|
| 283 |
+
4. OBJECTIVE SUMMARY:
|
| 284 |
+
- List primary grammatical patterns observed
|
| 285 |
+
- Provide data summary only
|
|
|
|
| 286 |
""",
|
| 287 |
|
| 288 |
"Vocabulary and Semantics": """
|
|
|
|
| 308 |
- Identify compensatory strategies
|
| 309 |
- Assess overall lexical access efficiency
|
| 310 |
|
| 311 |
+
5. OBJECTIVE SUMMARY:
|
| 312 |
+
- List vocabulary patterns observed
|
| 313 |
+
- Provide data summary only
|
| 314 |
""",
|
| 315 |
|
| 316 |
"Pragmatics and Discourse": """
|
|
|
|
| 336 |
- Assess contextual appropriateness
|
| 337 |
- Evaluate social awareness in communication
|
| 338 |
|
| 339 |
+
5. OBJECTIVE SUMMARY:
|
| 340 |
+
- List pragmatic patterns observed
|
| 341 |
+
- Provide data summary only
|
| 342 |
""",
|
| 343 |
|
| 344 |
"Sentence Complexity": """
|
|
|
|
| 361 |
|
| 362 |
4. SYNTACTIC MATURITY:
|
| 363 |
- Calculate Mean Length of Utterance (MLU)
|
| 364 |
+
- List syntactic patterns observed
|
|
|
|
| 365 |
|
| 366 |
+
5. OBJECTIVE SUMMARY:
|
| 367 |
+
- Provide complexity data summary
|
| 368 |
+
- List observed patterns only
|
| 369 |
""",
|
| 370 |
|
| 371 |
"Word Finding and Retrieval": """
|
|
|
|
| 391 |
- Assess impact of topic familiarity
|
| 392 |
- Evaluate effect of linguistic complexity on retrieval
|
| 393 |
|
| 394 |
+
5. OBJECTIVE SUMMARY:
|
| 395 |
+
- List word-finding patterns observed
|
| 396 |
+
- Provide data summary only
|
|
|
|
| 397 |
"""
|
| 398 |
}
|
| 399 |
|
|
|
|
| 402 |
|
| 403 |
prompt = f"""
|
| 404 |
You are a speech-language pathologist conducting a targeted analysis of a specific area.
|
|
|
|
|
|
|
| 405 |
|
| 406 |
TRANSCRIPT:
|
| 407 |
{transcript_content}{notes_section}
|
|
|
|
| 413 |
INSTRUCTIONS:
|
| 414 |
- Provide specific examples with exact quotes from the transcript
|
| 415 |
- Include quantitative data (counts, percentages, rates per 100 words)
|
| 416 |
+
- Provide objective data interpretation only
|
| 417 |
+
- Focus on measurable observations
|
| 418 |
- Be thorough but focused on the specified area
|
| 419 |
|
| 420 |
Conduct the targeted analysis:
|
|
|
|
| 474 |
|
| 475 |
annotation_prompt = f"""
|
| 476 |
You are a speech-language pathologist preparing a transcript for detailed analysis. Your task is to ANNOTATE the ENTIRE transcript with linguistic markers at a WORD-BY-WORD level.
|
|
|
|
|
|
|
| 477 |
|
| 478 |
ORIGINAL TRANSCRIPT:
|
| 479 |
{transcript_content}{notes_section}
|
|
|
|
| 590 |
"""
|
| 591 |
|
| 592 |
analysis_prompt = f"""
|
| 593 |
+
You are a speech-language pathologist conducting a comprehensive analysis of an annotated speech sample. Provide objective data analysis without clinical interpretations.
|
| 594 |
|
|
|
|
|
|
|
| 595 |
ANNOTATED TRANSCRIPT:
|
| 596 |
{annotated_transcript}{notes_section}
|
| 597 |
|
| 598 |
+
INSTRUCTIONS: Complete ALL 8 sections below. Use simple formatting with NO BOLDING (no ** or asterisks), NO hashtags (###), and minimal markdown. Focus on objective data only. Count all markers precisely and provide specific examples. Write section headers as plain text followed by a colon. DO NOT include age/gender comparisons, clinical interpretations, severity assessments, or treatment recommendations.
|
| 599 |
|
| 600 |
COMPREHENSIVE SPEECH SAMPLE ANALYSIS
|
| 601 |
|
|
|
|
| 620 |
* Semantic revisions: "car- I mean bike"
|
| 621 |
- Pauses ([PAUSE]): Count hesitation markers and silent pauses
|
| 622 |
- Total disfluency rate: Calculate combined rate per 100 words
|
|
|
|
| 623 |
|
| 624 |
B. Word Retrieval Issues (detailed analysis):
|
| 625 |
- Circumlocutions ([CIRCUMLOCUTION]): Count and analyze strategies
|
|
|
|
| 691 |
- Clauses per utterance ratio
|
| 692 |
- Subordination index
|
| 693 |
- Coordination index
|
|
|
|
| 694 |
|
| 695 |
4. FIGURATIVE LANGUAGE ANALYSIS
|
| 696 |
|
|
|
|
| 699 |
* Metaphors: "Time is money"
|
| 700 |
* Similes: "Fast as lightning"
|
| 701 |
* Idioms: "Raining cats and dogs"
|
| 702 |
+
- Appropriateness assessment: Context only
|
| 703 |
- Comprehension vs. production abilities
|
| 704 |
- Abstract language development indicators
|
| 705 |
|
|
|
|
| 735 |
- Vocabulary breadth: Range of semantic categories
|
| 736 |
- Vocabulary depth: Precision and nuance within categories
|
| 737 |
- Academic vs. conversational vocabulary ratio
|
| 738 |
+
- Vocabulary development patterns observed
|
| 739 |
|
| 740 |
7. NLP-DERIVED LINGUISTIC FEATURES (use bullet lists, NO tables)
|
| 741 |
|
|
|
|
| 750 |
* Provide exact MTLD score and interpretation
|
| 751 |
- Hypergeometric Distribution D (HDD): Probability-based diversity measure
|
| 752 |
* Controls for text length effects
|
| 753 |
+
* Provide HDD score
|
| 754 |
|
| 755 |
B. Word Frequency Analysis (as bullet list, not table):
|
| 756 |
- Most frequent words used: List top 10 as "word (count)" format
|
| 757 |
- High-frequency vs. low-frequency word distribution
|
| 758 |
- Function words vs. content words ratio
|
| 759 |
+
- Repetitive word patterns observed
|
| 760 |
|
| 761 |
C. Linguistic Complexity Indicators (bullet format):
|
| 762 |
- Average word length in syllables
|
|
|
|
| 770 |
- Derivational morphology: Prefixes and suffixes
|
| 771 |
- Inflectional morphology: Tense, number, case markers
|
| 772 |
- Morphological awareness indicators
|
| 773 |
+
- Error patterns observed
|
| 774 |
|
| 775 |
B. Phonological Considerations:
|
| 776 |
- Sound pattern analysis (if evident in transcript)
|
|
|
|
| 792 |
C. Executive Function Evidence:
|
| 793 |
- Self-monitoring and error correction
|
| 794 |
- Planning and organization in discourse
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 795 |
CRITICAL REQUIREMENTS:
|
| 796 |
+
1. Complete ALL 8 sections - do not stop early
|
| 797 |
2. Provide exact counts for all markers with specific examples
|
| 798 |
3. Calculate all percentages and rates with formulas shown
|
| 799 |
4. Include direct quotes from transcript for examples
|
| 800 |
+
5. Focus on objective data only - NO clinical interpretations or age/gender comparisons
|
| 801 |
+
6. NO treatment recommendations or clinical implications
|
| 802 |
7. If response is incomplete, end with <CONTINUE>
|
| 803 |
8. FORMATTING: Use NO asterisks (**), NO hashtags (###), NO bolding - plain text only
|
| 804 |
"""
|
|
|
|
| 1292 |
"4. FIGURATIVE LANGUAGE ANALYSIS",
|
| 1293 |
"5. PRAGMATIC LANGUAGE ASSESSMENT",
|
| 1294 |
"6. VOCABULARY AND SEMANTIC ANALYSIS",
|
| 1295 |
+
"7. MORPHOLOGICAL AND PHONOLOGICAL ANALYSIS",
|
| 1296 |
+
"8. QUANTITATIVE METRICS AND NLP FEATURES"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1297 |
]
|
| 1298 |
|
| 1299 |
# Safety limits to prevent infinite loops
|
|
|
|
| 1545 |
- Count [REPETITION] markers: Categorize by type (word, phrase, sound)
|
| 1546 |
- Count [REVISION] markers: Analyze self-correction patterns
|
| 1547 |
- Count [PAUSE] markers: Assess hesitation frequency
|
| 1548 |
+
- Calculate total disfluency rate
|
| 1549 |
|
| 1550 |
B. Word Retrieval Issues:
|
| 1551 |
- Count [CIRCUMLOCUTION] markers: List each roundabout description
|
|
|
|
| 1588 |
|
| 1589 |
C. Sentence Structure Analysis:
|
| 1590 |
- Use calculated MLU: {linguistic_metrics.get('mlu_words', 0)} words, {linguistic_metrics.get('mlu_morphemes', 0)} morphemes
|
| 1591 |
+
- Calculate complexity ratios
|
| 1592 |
|
| 1593 |
4. FIGURATIVE LANGUAGE ANALYSIS (with exact counts):
|
| 1594 |
|
|
|
|
| 1668 |
- Grammar error rate: Calculate from marker counts
|
| 1669 |
- Vocabulary sophistication ratio: {marker_analysis.get('category_totals', {}).get('vocab_sophistication_ratio', 0):.3f}
|
| 1670 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1671 |
CRITICAL REQUIREMENTS:
|
| 1672 |
- Use the provided calculated metrics in your analysis
|
| 1673 |
- Provide EXACT counts for every marker type
|
| 1674 |
- Calculate precise percentages and show your work
|
| 1675 |
- Give specific examples from the transcript
|
| 1676 |
- If annotation is incomplete, supplement with analysis of the original transcript
|
| 1677 |
+
- Complete ALL 8 sections - use <CONTINUE> if needed
|
| 1678 |
+
- Focus on objective data only - NO clinical interpretations
|
| 1679 |
"""
|
| 1680 |
|
| 1681 |
return call_claude_api_with_continuation(analysis_prompt)
|
|
|
|
| 2051 |
|
| 2052 |
Provide a comprehensive clinical interpretation organized into these sections:
|
| 2053 |
|
| 2054 |
+
1. LEXICAL DIVERSITY DATA:
|
| 2055 |
+
- Report the advanced lexical diversity measures (MTLD, HDD, MATTR, etc.)
|
| 2056 |
+
- Provide objective data interpretation only
|
|
|
|
| 2057 |
|
| 2058 |
+
2. FLUENCY PATTERN DATA:
|
| 2059 |
+
- Report fluency marker counts and rates
|
| 2060 |
+
- Provide objective data summary only
|
|
|
|
| 2061 |
|
| 2062 |
+
3. GRAMMATICAL PATTERN DATA:
|
| 2063 |
+
- Report grammar error patterns from verified counts
|
| 2064 |
+
- Provide objective data summary only
|
|
|
|
| 2065 |
|
| 2066 |
4. VOCABULARY AND SEMANTIC ANALYSIS:
|
| 2067 |
- Interpretation of vocabulary sophistication measures
|
|
|
|
| 2078 |
- Strengths and areas of need
|
| 2079 |
- Functional communication impact
|
| 2080 |
|
| 2081 |
+
Focus on OBJECTIVE DATA INTERPRETATION only, not clinical significance.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2082 |
All measurements are already verified and accurate.
|
| 2083 |
+
Cite specific examples from the transcript to support your observations.
|
| 2084 |
"""
|
| 2085 |
|
| 2086 |
ai_interpretation = call_claude_api(verified_prompt)
|
|
|
|
| 2168 |
- Filler words: Use verified count of {marker_counts.get('FILLER', 0)} fillers
|
| 2169 |
* Calculate rate per 100 words: {marker_counts.get('FILLER', 0)/total_words*100:.2f}%
|
| 2170 |
* Identify types and provide examples from transcript
|
| 2171 |
+
* Provide objective count summary
|
| 2172 |
- False starts: Use verified count of {marker_counts.get('FALSE_START', 0)}
|
| 2173 |
* Provide specific examples from transcript
|
| 2174 |
* Analyze patterns and self-correction abilities
|
| 2175 |
- Repetitions: Use verified count of {marker_counts.get('REPETITION', 0)}
|
| 2176 |
* Categorize types (word, phrase, sound level)
|
| 2177 |
+
* Provide examples and count summary
|
| 2178 |
- Total disfluency assessment: Use verified total of {category_totals['fluency_issues']}
|
| 2179 |
* Rate: {category_totals['fluency_issues']/total_words*100:.2f} per 100 words
|
| 2180 |
+
* Provide objective rate calculation
|
| 2181 |
|
| 2182 |
B. Word Retrieval Issues:
|
| 2183 |
- Circumlocutions: Count and analyze from transcript
|
|
|
|
| 2202 |
|
| 2203 |
B. Grammar and Morphology:
|
| 2204 |
- Error pattern analysis using verified counts
|
| 2205 |
+
- Pattern analysis only
|
| 2206 |
- Morphological complexity evaluation
|
| 2207 |
|
| 2208 |
3. COMPLEX SENTENCE ANALYSIS (use verified counts)
|
|
|
|
| 2229 |
- Tangential speech: Use verified count of {marker_counts.get('TANGENT', 0)}
|
| 2230 |
- Coherence breaks: Use verified count of {marker_counts.get('COHERENCE_BREAK', 0)}
|
| 2231 |
- Referential clarity: Use verified count of {marker_counts.get('PRONOUN_REF', 0)}
|
| 2232 |
+
- Overall conversational patterns observed
|
| 2233 |
|
| 2234 |
6. VOCABULARY AND SEMANTIC ANALYSIS
|
| 2235 |
- Semantic errors: Use verified count of {marker_counts.get('SEMANTIC_ERROR', 0)}
|
|
|
|
| 2241 |
- Morphological complexity assessment
|
| 2242 |
- Derivational and inflectional morphology patterns
|
| 2243 |
- Error analysis using verified counts
|
| 2244 |
+
- Pattern analysis only
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2245 |
|
| 2246 |
+
8. QUANTITATIVE METRICS AND NLP FEATURES (use ALL verified data)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2247 |
- Total words: {total_words}
|
| 2248 |
- Total sentences: {linguistic_metrics.get('total_sentences', 0)}
|
| 2249 |
- Unique words: {linguistic_metrics.get('unique_words', 0)}
|
|
|
|
| 2251 |
- MLU morphemes: {linguistic_metrics.get('mlu_morphemes', 0):.2f}
|
| 2252 |
- All error rates and ratios from verified counts
|
| 2253 |
|
| 2254 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2255 |
|
| 2256 |
CRITICAL: Complete ALL 13 sections using verified data and specific transcript examples.
|
| 2257 |
"""
|