SreekarB commited on
Commit
309ccf7
·
verified ·
1 Parent(s): 503556f

Update annotated_casl_app.py

Browse files
Files changed (1) hide show
  1. annotated_casl_app.py +59 -195
annotated_casl_app.py CHANGED
@@ -118,13 +118,8 @@ def combine_sections_smartly(sections_dict):
118
  "4. FIGURATIVE LANGUAGE ANALYSIS",
119
  "5. PRAGMATIC LANGUAGE ASSESSMENT",
120
  "6. VOCABULARY AND SEMANTIC ANALYSIS",
121
- "7. NLP-DERIVED LINGUISTIC FEATURES",
122
- "8. MORPHOLOGICAL AND PHONOLOGICAL ANALYSIS",
123
- "9. COGNITIVE-LINGUISTIC FACTORS",
124
- "10. FLUENCY AND RHYTHM ANALYSIS",
125
- "11. QUANTITATIVE METRICS",
126
- "12. CLINICAL IMPLICATIONS",
127
- "13. PROGNOSIS AND SUMMARY"
128
  ]
129
 
130
  combined_parts = []
@@ -205,8 +200,6 @@ def answer_quick_question(transcript_content, question, age, gender, slp_notes):
205
 
206
  prompt = f"""
207
  You are a speech-language pathologist answering a specific question about a speech sample.
208
-
209
- Patient: {age}-year-old {gender}
210
 
211
  TRANSCRIPT:
212
  {transcript_content}{notes_section}
@@ -217,7 +210,7 @@ def answer_quick_question(transcript_content, question, age, gender, slp_notes):
217
  - Provide a focused, detailed answer to the specific question asked
218
  - Include specific examples from the transcript with exact quotes
219
  - Provide quantitative data when relevant (counts, percentages, rates)
220
- - Give clinical interpretation and significance
221
  - Keep the response focused on the question but thorough in analysis
222
  - If the question relates to multiple areas, address all relevant aspects
223
 
@@ -263,10 +256,9 @@ def analyze_targeted_area(transcript_content, analysis_area, age, gender, slp_no
263
  - Note any fluency-enhancing contexts
264
  - Assess overall speech rhythm and flow
265
 
266
- 4. CLINICAL SIGNIFICANCE:
267
- - Compare to age-appropriate norms
268
- - Assess severity level
269
- - Recommend intervention priorities
270
  """,
271
 
272
  "Grammar and Syntax": """
@@ -288,10 +280,9 @@ def analyze_targeted_area(transcript_content, analysis_area, age, gender, slp_no
288
  - Count subject-verb agreement errors
289
  - Assess auxiliary verb usage
290
 
291
- 4. CLINICAL IMPLICATIONS:
292
- - Identify primary grammatical targets for intervention
293
- - Assess developmental appropriateness
294
- - Recommend specific therapy goals
295
  """,
296
 
297
  "Vocabulary and Semantics": """
@@ -317,9 +308,9 @@ def analyze_targeted_area(transcript_content, analysis_area, age, gender, slp_no
317
  - Identify compensatory strategies
318
  - Assess overall lexical access efficiency
319
 
320
- 5. CLINICAL RECOMMENDATIONS:
321
- - Identify vocabulary intervention targets
322
- - Recommend strategies for word-finding support
323
  """,
324
 
325
  "Pragmatics and Discourse": """
@@ -345,9 +336,9 @@ def analyze_targeted_area(transcript_content, analysis_area, age, gender, slp_no
345
  - Assess contextual appropriateness
346
  - Evaluate social awareness in communication
347
 
348
- 5. CLINICAL IMPLICATIONS:
349
- - Identify pragmatic intervention priorities
350
- - Recommend social communication goals
351
  """,
352
 
353
  "Sentence Complexity": """
@@ -370,12 +361,11 @@ def analyze_targeted_area(transcript_content, analysis_area, age, gender, slp_no
370
 
371
  4. SYNTACTIC MATURITY:
372
  - Calculate Mean Length of Utterance (MLU)
373
- - Assess developmental appropriateness
374
- - Identify areas for syntactic growth
375
 
376
- 5. CLINICAL RECOMMENDATIONS:
377
- - Identify targets for increasing complexity
378
- - Recommend scaffolding strategies
379
  """,
380
 
381
  "Word Finding and Retrieval": """
@@ -401,10 +391,9 @@ def analyze_targeted_area(transcript_content, analysis_area, age, gender, slp_no
401
  - Assess impact of topic familiarity
402
  - Evaluate effect of linguistic complexity on retrieval
403
 
404
- 5. CLINICAL IMPLICATIONS:
405
- - Recommend word-finding intervention strategies
406
- - Identify cueing hierarchies to trial
407
- - Suggest compensatory strategy training
408
  """
409
  }
410
 
@@ -413,8 +402,6 @@ def analyze_targeted_area(transcript_content, analysis_area, age, gender, slp_no
413
 
414
  prompt = f"""
415
  You are a speech-language pathologist conducting a targeted analysis of a specific area.
416
-
417
- Patient: {age}-year-old {gender}
418
 
419
  TRANSCRIPT:
420
  {transcript_content}{notes_section}
@@ -426,8 +413,8 @@ def analyze_targeted_area(transcript_content, analysis_area, age, gender, slp_no
426
  INSTRUCTIONS:
427
  - Provide specific examples with exact quotes from the transcript
428
  - Include quantitative data (counts, percentages, rates per 100 words)
429
- - Give detailed clinical interpretation
430
- - Provide specific, measurable recommendations
431
  - Be thorough but focused on the specified area
432
 
433
  Conduct the targeted analysis:
@@ -487,8 +474,6 @@ def annotate_transcript(transcript_content, age, gender, slp_notes):
487
 
488
  annotation_prompt = f"""
489
  You are a speech-language pathologist preparing a transcript for detailed analysis. Your task is to ANNOTATE the ENTIRE transcript with linguistic markers at a WORD-BY-WORD level.
490
-
491
- Patient: {age}-year-old {gender}
492
 
493
  ORIGINAL TRANSCRIPT:
494
  {transcript_content}{notes_section}
@@ -605,14 +590,12 @@ def analyze_annotated_transcript(annotated_transcript, age, gender, slp_notes):
605
  """
606
 
607
  analysis_prompt = f"""
608
- You are a speech-language pathologist conducting a comprehensive analysis of an annotated speech sample. Provide a complete, clinically useful analysis without excessive formatting.
609
 
610
- Patient: {age}-year-old {gender}
611
-
612
  ANNOTATED TRANSCRIPT:
613
  {annotated_transcript}{notes_section}
614
 
615
- INSTRUCTIONS: Complete ALL 13 sections below. Use simple formatting with NO BOLDING (no ** or asterisks), NO hashtags (###), and minimal markdown. Focus on clinical utility and completeness. Count all markers precisely and provide specific examples. Write section headers as plain text followed by a colon.
616
 
617
  COMPREHENSIVE SPEECH SAMPLE ANALYSIS
618
 
@@ -637,7 +620,6 @@ def analyze_annotated_transcript(annotated_transcript, age, gender, slp_notes):
637
  * Semantic revisions: "car- I mean bike"
638
  - Pauses ([PAUSE]): Count hesitation markers and silent pauses
639
  - Total disfluency rate: Calculate combined rate per 100 words
640
- - Severity assessment: Compare to age norms
641
 
642
  B. Word Retrieval Issues (detailed analysis):
643
  - Circumlocutions ([CIRCUMLOCUTION]): Count and analyze strategies
@@ -709,7 +691,6 @@ def analyze_annotated_transcript(annotated_transcript, age, gender, slp_notes):
709
  - Clauses per utterance ratio
710
  - Subordination index
711
  - Coordination index
712
- - Developmental appropriateness assessment
713
 
714
  4. FIGURATIVE LANGUAGE ANALYSIS
715
 
@@ -718,7 +699,7 @@ def analyze_annotated_transcript(annotated_transcript, age, gender, slp_notes):
718
  * Metaphors: "Time is money"
719
  * Similes: "Fast as lightning"
720
  * Idioms: "Raining cats and dogs"
721
- - Appropriateness assessment: Context and age-level
722
  - Comprehension vs. production abilities
723
  - Abstract language development indicators
724
 
@@ -754,7 +735,7 @@ def analyze_annotated_transcript(annotated_transcript, age, gender, slp_notes):
754
  - Vocabulary breadth: Range of semantic categories
755
  - Vocabulary depth: Precision and nuance within categories
756
  - Academic vs. conversational vocabulary ratio
757
- - Age-appropriate vocabulary development
758
 
759
  7. NLP-DERIVED LINGUISTIC FEATURES (use bullet lists, NO tables)
760
 
@@ -769,13 +750,13 @@ def analyze_annotated_transcript(annotated_transcript, age, gender, slp_notes):
769
  * Provide exact MTLD score and interpretation
770
  - Hypergeometric Distribution D (HDD): Probability-based diversity measure
771
  * Controls for text length effects
772
- * Provide HDD score and clinical significance
773
 
774
  B. Word Frequency Analysis (as bullet list, not table):
775
  - Most frequent words used: List top 10 as "word (count)" format
776
  - High-frequency vs. low-frequency word distribution
777
  - Function words vs. content words ratio
778
- - Repetitive word patterns and their clinical significance
779
 
780
  C. Linguistic Complexity Indicators (bullet format):
781
  - Average word length in syllables
@@ -789,7 +770,7 @@ def analyze_annotated_transcript(annotated_transcript, age, gender, slp_notes):
789
  - Derivational morphology: Prefixes and suffixes
790
  - Inflectional morphology: Tense, number, case markers
791
  - Morphological awareness indicators
792
- - Error patterns and developmental appropriateness
793
 
794
  B. Phonological Considerations:
795
  - Sound pattern analysis (if evident in transcript)
@@ -811,75 +792,13 @@ def analyze_annotated_transcript(annotated_transcript, age, gender, slp_notes):
811
  C. Executive Function Evidence:
812
  - Self-monitoring and error correction
813
  - Planning and organization in discourse
814
- - Cognitive flexibility in topic management
815
-
816
- 10. FLUENCY AND RHYTHM ANALYSIS
817
-
818
- A. Disfluency Patterns:
819
- - Total disfluency count and rate per 100 words
820
- - Disfluency type distribution
821
- - Clustering patterns and severity assessment
822
- - Impact on communication effectiveness
823
-
824
- B. Speech Flow and Rhythm:
825
- - Natural pause patterns vs. disrupted flow
826
- - Rhythm and prosodic patterns (if evident)
827
- - Overall fluency profile and age-appropriateness
828
-
829
- 11. QUANTITATIVE METRICS
830
-
831
- A. Basic Measures:
832
- - Total words: [exact count]
833
- - Total sentences: [exact count]
834
- - Unique words: [exact count]
835
- - MLU words: [calculation with formula shown]
836
- - MLU morphemes: [calculation with formula shown]
837
- - Type-Token Ratio: [calculation and interpretation]
838
-
839
- B. Error Rates and Ratios:
840
- - Disfluency rate per 100 words
841
- - Grammatical accuracy percentage
842
- - Vocabulary sophistication ratio
843
- - Sentence complexity distribution percentages
844
-
845
- 12. CLINICAL IMPLICATIONS
846
-
847
- A. Strengths (ranked by prominence):
848
- - Primary strengths with supporting evidence
849
- - Secondary strengths with examples
850
- - Compensatory strategies observed
851
-
852
- B. Areas of Need (prioritized by severity):
853
- - Primary concerns with impact assessment
854
- - Secondary concerns with supporting data
855
- - Developmental vs. disorder considerations
856
-
857
- C. Treatment Recommendations:
858
- - Specific, measurable therapy goals
859
- - Intervention approaches and techniques
860
- - Frequency and duration recommendations
861
- - Progress monitoring strategies
862
-
863
- 13. PROGNOSIS AND SUMMARY
864
-
865
- A. Overall Communication Profile:
866
- - Comprehensive summary of findings
867
- - Developmental appropriateness assessment
868
- - Functional communication impact
869
-
870
- B. Treatment Planning:
871
- - Priority intervention targets
872
- - Expected outcomes and timeline
873
- - Follow-up assessment recommendations
874
- - Family/educational recommendations
875
-
876
  CRITICAL REQUIREMENTS:
877
- 1. Complete ALL 13 sections - do not stop early
878
  2. Provide exact counts for all markers with specific examples
879
  3. Calculate all percentages and rates with formulas shown
880
  4. Include direct quotes from transcript for examples
881
- 5. Analyze patterns and provide clinical interpretations
882
- 6. Focus on actionable, clinically relevant insights
883
  7. If response is incomplete, end with <CONTINUE>
884
  8. FORMATTING: Use NO asterisks (**), NO hashtags (###), NO bolding - plain text only
885
  """
@@ -1373,13 +1292,8 @@ def call_claude_api_with_continuation(prompt):
1373
  "4. FIGURATIVE LANGUAGE ANALYSIS",
1374
  "5. PRAGMATIC LANGUAGE ASSESSMENT",
1375
  "6. VOCABULARY AND SEMANTIC ANALYSIS",
1376
- "7. NLP-DERIVED LINGUISTIC FEATURES",
1377
- "8. MORPHOLOGICAL AND PHONOLOGICAL ANALYSIS",
1378
- "9. COGNITIVE-LINGUISTIC FACTORS",
1379
- "10. FLUENCY AND RHYTHM ANALYSIS",
1380
- "11. QUANTITATIVE METRICS",
1381
- "12. CLINICAL IMPLICATIONS",
1382
- "13. PROGNOSIS AND SUMMARY"
1383
  ]
1384
 
1385
  # Safety limits to prevent infinite loops
@@ -1631,7 +1545,7 @@ def analyze_with_backup(annotated_transcript, original_transcript, age, gender,
1631
  - Count [REPETITION] markers: Categorize by type (word, phrase, sound)
1632
  - Count [REVISION] markers: Analyze self-correction patterns
1633
  - Count [PAUSE] markers: Assess hesitation frequency
1634
- - Calculate total disfluency rate and severity level
1635
 
1636
  B. Word Retrieval Issues:
1637
  - Count [CIRCUMLOCUTION] markers: List each roundabout description
@@ -1674,7 +1588,7 @@ def analyze_with_backup(annotated_transcript, original_transcript, age, gender,
1674
 
1675
  C. Sentence Structure Analysis:
1676
  - Use calculated MLU: {linguistic_metrics.get('mlu_words', 0)} words, {linguistic_metrics.get('mlu_morphemes', 0)} morphemes
1677
- - Calculate complexity ratios and assess developmental appropriateness
1678
 
1679
  4. FIGURATIVE LANGUAGE ANALYSIS (with exact counts):
1680
 
@@ -1754,25 +1668,14 @@ def analyze_with_backup(annotated_transcript, original_transcript, age, gender,
1754
  - Grammar error rate: Calculate from marker counts
1755
  - Vocabulary sophistication ratio: {marker_analysis.get('category_totals', {}).get('vocab_sophistication_ratio', 0):.3f}
1756
 
1757
- 11. CLINICAL IMPLICATIONS:
1758
- - Primary strengths: List with supporting evidence from markers and metrics
1759
- - Primary weaknesses: Rank by severity with exact counts
1760
- - Intervention priorities: Based on error frequency and impact
1761
- - Therapy targets: Specific, measurable goals
1762
-
1763
- 12. PROGNOSIS AND SUMMARY:
1764
- - Overall communication profile with percentile estimates
1765
- - Developmental appropriateness assessment
1766
- - Summary of key findings from quantitative analysis
1767
- - Priority treatment goals and expected outcomes
1768
-
1769
  CRITICAL REQUIREMENTS:
1770
  - Use the provided calculated metrics in your analysis
1771
  - Provide EXACT counts for every marker type
1772
  - Calculate precise percentages and show your work
1773
  - Give specific examples from the transcript
1774
  - If annotation is incomplete, supplement with analysis of the original transcript
1775
- - Complete ALL 13 sections - use <CONTINUE> if needed
 
1776
  """
1777
 
1778
  return call_claude_api_with_continuation(analysis_prompt)
@@ -2148,20 +2051,17 @@ with gr.Blocks(title="Speech Analysis", theme=gr.themes.Soft()) as demo:
2148
 
2149
  Provide a comprehensive clinical interpretation organized into these sections:
2150
 
2151
- 1. LEXICAL DIVERSITY INTERPRETATION:
2152
- - Interpret the advanced lexical diversity measures (MTLD, HDD, MATTR, etc.)
2153
- - Compare to age-appropriate norms
2154
- - Clinical significance of diversity patterns
2155
 
2156
- 2. FLUENCY PATTERN ANALYSIS:
2157
- - Clinical interpretation of fluency marker counts and rates
2158
- - Severity assessment based on verified counts
2159
- - Impact on communication effectiveness
2160
 
2161
- 3. GRAMMATICAL COMPETENCE ASSESSMENT:
2162
- - Analysis of grammar error patterns from verified counts
2163
- - Developmental appropriateness
2164
- - Areas of strength vs. weakness
2165
 
2166
  4. VOCABULARY AND SEMANTIC ANALYSIS:
2167
  - Interpretation of vocabulary sophistication measures
@@ -2178,15 +2078,9 @@ with gr.Blocks(title="Speech Analysis", theme=gr.themes.Soft()) as demo:
2178
  - Strengths and areas of need
2179
  - Functional communication impact
2180
 
2181
- 7. CLINICAL RECOMMENDATIONS:
2182
- - Specific intervention targets based on verified data
2183
- - Therapy approaches and techniques
2184
- - Progress monitoring suggestions
2185
- - Prognosis and expected outcomes
2186
-
2187
- Focus on INTERPRETATION and CLINICAL SIGNIFICANCE, not counting.
2188
  All measurements are already verified and accurate.
2189
- Cite specific examples from the transcript to support your interpretations.
2190
  """
2191
 
2192
  ai_interpretation = call_claude_api(verified_prompt)
@@ -2274,16 +2168,16 @@ with gr.Blocks(title="Speech Analysis", theme=gr.themes.Soft()) as demo:
2274
  - Filler words: Use verified count of {marker_counts.get('FILLER', 0)} fillers
2275
  * Calculate rate per 100 words: {marker_counts.get('FILLER', 0)/total_words*100:.2f}%
2276
  * Identify types and provide examples from transcript
2277
- * Assess severity and impact on communication
2278
  - False starts: Use verified count of {marker_counts.get('FALSE_START', 0)}
2279
  * Provide specific examples from transcript
2280
  * Analyze patterns and self-correction abilities
2281
  - Repetitions: Use verified count of {marker_counts.get('REPETITION', 0)}
2282
  * Categorize types (word, phrase, sound level)
2283
- * Provide examples and assess severity
2284
  - Total disfluency assessment: Use verified total of {category_totals['fluency_issues']}
2285
  * Rate: {category_totals['fluency_issues']/total_words*100:.2f} per 100 words
2286
- * Compare to age norms and assess severity
2287
 
2288
  B. Word Retrieval Issues:
2289
  - Circumlocutions: Count and analyze from transcript
@@ -2308,7 +2202,7 @@ with gr.Blocks(title="Speech Analysis", theme=gr.themes.Soft()) as demo:
2308
 
2309
  B. Grammar and Morphology:
2310
  - Error pattern analysis using verified counts
2311
- - Developmental appropriateness assessment
2312
  - Morphological complexity evaluation
2313
 
2314
  3. COMPLEX SENTENCE ANALYSIS (use verified counts)
@@ -2335,7 +2229,7 @@ with gr.Blocks(title="Speech Analysis", theme=gr.themes.Soft()) as demo:
2335
  - Tangential speech: Use verified count of {marker_counts.get('TANGENT', 0)}
2336
  - Coherence breaks: Use verified count of {marker_counts.get('COHERENCE_BREAK', 0)}
2337
  - Referential clarity: Use verified count of {marker_counts.get('PRONOUN_REF', 0)}
2338
- - Overall conversational competence assessment
2339
 
2340
  6. VOCABULARY AND SEMANTIC ANALYSIS
2341
  - Semantic errors: Use verified count of {marker_counts.get('SEMANTIC_ERROR', 0)}
@@ -2347,21 +2241,9 @@ with gr.Blocks(title="Speech Analysis", theme=gr.themes.Soft()) as demo:
2347
  - Morphological complexity assessment
2348
  - Derivational and inflectional morphology patterns
2349
  - Error analysis using verified counts
2350
- - Developmental appropriateness
2351
-
2352
- 8. COGNITIVE-LINGUISTIC FACTORS
2353
- - Working memory indicators from sentence complexity
2354
- - Processing speed markers from fluency patterns
2355
- - Executive function evidence from self-corrections
2356
- - Attention and cognitive load management
2357
 
2358
- 9. FLUENCY AND RHYTHM ANALYSIS
2359
- - Disfluency pattern analysis using verified counts
2360
- - Speech rhythm and flow assessment
2361
- - Natural vs. disrupted pause patterns
2362
- - Overall fluency profile
2363
-
2364
- 10. QUANTITATIVE METRICS (use ALL verified data)
2365
  - Total words: {total_words}
2366
  - Total sentences: {linguistic_metrics.get('total_sentences', 0)}
2367
  - Unique words: {linguistic_metrics.get('unique_words', 0)}
@@ -2369,25 +2251,7 @@ with gr.Blocks(title="Speech Analysis", theme=gr.themes.Soft()) as demo:
2369
  - MLU morphemes: {linguistic_metrics.get('mlu_morphemes', 0):.2f}
2370
  - All error rates and ratios from verified counts
2371
 
2372
- 11. CLINICAL IMPLICATIONS
2373
- A. Strengths (with supporting evidence):
2374
- - Identify primary strengths using verified data
2375
- - Provide specific examples from transcript
2376
-
2377
- B. Areas of Need (prioritized by severity):
2378
- - Primary concerns based on verified counts and rates
2379
- - Secondary areas for intervention
2380
-
2381
- C. Treatment Recommendations:
2382
- - Specific, measurable therapy goals
2383
- - Evidence-based intervention approaches
2384
- - Progress monitoring strategies
2385
-
2386
- 12. PROGNOSIS AND SUMMARY
2387
- - Overall communication profile synthesis
2388
- - Functional impact assessment
2389
- - Treatment planning and expected outcomes
2390
- - Follow-up recommendations
2391
 
2392
  CRITICAL: Complete ALL 13 sections using verified data and specific transcript examples.
2393
  """
 
118
  "4. FIGURATIVE LANGUAGE ANALYSIS",
119
  "5. PRAGMATIC LANGUAGE ASSESSMENT",
120
  "6. VOCABULARY AND SEMANTIC ANALYSIS",
121
+ "7. MORPHOLOGICAL AND PHONOLOGICAL ANALYSIS",
122
+ "8. QUANTITATIVE METRICS AND NLP FEATURES"
 
 
 
 
 
123
  ]
124
 
125
  combined_parts = []
 
200
 
201
  prompt = f"""
202
  You are a speech-language pathologist answering a specific question about a speech sample.
 
 
203
 
204
  TRANSCRIPT:
205
  {transcript_content}{notes_section}
 
210
  - Provide a focused, detailed answer to the specific question asked
211
  - Include specific examples from the transcript with exact quotes
212
  - Provide quantitative data when relevant (counts, percentages, rates)
213
+ - Provide objective data interpretation only
214
  - Keep the response focused on the question but thorough in analysis
215
  - If the question relates to multiple areas, address all relevant aspects
216
 
 
256
  - Note any fluency-enhancing contexts
257
  - Assess overall speech rhythm and flow
258
 
259
+ 4. OBJECTIVE SUMMARY:
260
+ - Provide data summary only
261
+ - List observed patterns
 
262
  """,
263
 
264
  "Grammar and Syntax": """
 
280
  - Count subject-verb agreement errors
281
  - Assess auxiliary verb usage
282
 
283
+ 4. OBJECTIVE SUMMARY:
284
+ - List primary grammatical patterns observed
285
+ - Provide data summary only
 
286
  """,
287
 
288
  "Vocabulary and Semantics": """
 
308
  - Identify compensatory strategies
309
  - Assess overall lexical access efficiency
310
 
311
+ 5. OBJECTIVE SUMMARY:
312
+ - List vocabulary patterns observed
313
+ - Provide data summary only
314
  """,
315
 
316
  "Pragmatics and Discourse": """
 
336
  - Assess contextual appropriateness
337
  - Evaluate social awareness in communication
338
 
339
+ 5. OBJECTIVE SUMMARY:
340
+ - List pragmatic patterns observed
341
+ - Provide data summary only
342
  """,
343
 
344
  "Sentence Complexity": """
 
361
 
362
  4. SYNTACTIC MATURITY:
363
  - Calculate Mean Length of Utterance (MLU)
364
+ - List syntactic patterns observed
 
365
 
366
+ 5. OBJECTIVE SUMMARY:
367
+ - Provide complexity data summary
368
+ - List observed patterns only
369
  """,
370
 
371
  "Word Finding and Retrieval": """
 
391
  - Assess impact of topic familiarity
392
  - Evaluate effect of linguistic complexity on retrieval
393
 
394
+ 5. OBJECTIVE SUMMARY:
395
+ - List word-finding patterns observed
396
+ - Provide data summary only
 
397
  """
398
  }
399
 
 
402
 
403
  prompt = f"""
404
  You are a speech-language pathologist conducting a targeted analysis of a specific area.
 
 
405
 
406
  TRANSCRIPT:
407
  {transcript_content}{notes_section}
 
413
  INSTRUCTIONS:
414
  - Provide specific examples with exact quotes from the transcript
415
  - Include quantitative data (counts, percentages, rates per 100 words)
416
+ - Provide objective data interpretation only
417
+ - Focus on measurable observations
418
  - Be thorough but focused on the specified area
419
 
420
  Conduct the targeted analysis:
 
474
 
475
  annotation_prompt = f"""
476
  You are a speech-language pathologist preparing a transcript for detailed analysis. Your task is to ANNOTATE the ENTIRE transcript with linguistic markers at a WORD-BY-WORD level.
 
 
477
 
478
  ORIGINAL TRANSCRIPT:
479
  {transcript_content}{notes_section}
 
590
  """
591
 
592
  analysis_prompt = f"""
593
+ You are a speech-language pathologist conducting a comprehensive analysis of an annotated speech sample. Provide objective data analysis without clinical interpretations.
594
 
 
 
595
  ANNOTATED TRANSCRIPT:
596
  {annotated_transcript}{notes_section}
597
 
598
+ INSTRUCTIONS: Complete ALL 8 sections below. Use simple formatting with NO BOLDING (no ** or asterisks), NO hashtags (###), and minimal markdown. Focus on objective data only. Count all markers precisely and provide specific examples. Write section headers as plain text followed by a colon. DO NOT include age/gender comparisons, clinical interpretations, severity assessments, or treatment recommendations.
599
 
600
  COMPREHENSIVE SPEECH SAMPLE ANALYSIS
601
 
 
620
  * Semantic revisions: "car- I mean bike"
621
  - Pauses ([PAUSE]): Count hesitation markers and silent pauses
622
  - Total disfluency rate: Calculate combined rate per 100 words
 
623
 
624
  B. Word Retrieval Issues (detailed analysis):
625
  - Circumlocutions ([CIRCUMLOCUTION]): Count and analyze strategies
 
691
  - Clauses per utterance ratio
692
  - Subordination index
693
  - Coordination index
 
694
 
695
  4. FIGURATIVE LANGUAGE ANALYSIS
696
 
 
699
  * Metaphors: "Time is money"
700
  * Similes: "Fast as lightning"
701
  * Idioms: "Raining cats and dogs"
702
+ - Appropriateness assessment: Context only
703
  - Comprehension vs. production abilities
704
  - Abstract language development indicators
705
 
 
735
  - Vocabulary breadth: Range of semantic categories
736
  - Vocabulary depth: Precision and nuance within categories
737
  - Academic vs. conversational vocabulary ratio
738
+ - Vocabulary development patterns observed
739
 
740
  7. NLP-DERIVED LINGUISTIC FEATURES (use bullet lists, NO tables)
741
 
 
750
  * Provide exact MTLD score and interpretation
751
  - Hypergeometric Distribution D (HDD): Probability-based diversity measure
752
  * Controls for text length effects
753
+ * Provide HDD score
754
 
755
  B. Word Frequency Analysis (as bullet list, not table):
756
  - Most frequent words used: List top 10 as "word (count)" format
757
  - High-frequency vs. low-frequency word distribution
758
  - Function words vs. content words ratio
759
+ - Repetitive word patterns observed
760
 
761
  C. Linguistic Complexity Indicators (bullet format):
762
  - Average word length in syllables
 
770
  - Derivational morphology: Prefixes and suffixes
771
  - Inflectional morphology: Tense, number, case markers
772
  - Morphological awareness indicators
773
+ - Error patterns observed
774
 
775
  B. Phonological Considerations:
776
  - Sound pattern analysis (if evident in transcript)
 
792
  C. Executive Function Evidence:
793
  - Self-monitoring and error correction
794
  - Planning and organization in discourse
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
795
  CRITICAL REQUIREMENTS:
796
+ 1. Complete ALL 8 sections - do not stop early
797
  2. Provide exact counts for all markers with specific examples
798
  3. Calculate all percentages and rates with formulas shown
799
  4. Include direct quotes from transcript for examples
800
+ 5. Focus on objective data only - NO clinical interpretations or age/gender comparisons
801
+ 6. NO treatment recommendations or clinical implications
802
  7. If response is incomplete, end with <CONTINUE>
803
  8. FORMATTING: Use NO asterisks (**), NO hashtags (###), NO bolding - plain text only
804
  """
 
1292
  "4. FIGURATIVE LANGUAGE ANALYSIS",
1293
  "5. PRAGMATIC LANGUAGE ASSESSMENT",
1294
  "6. VOCABULARY AND SEMANTIC ANALYSIS",
1295
+ "7. MORPHOLOGICAL AND PHONOLOGICAL ANALYSIS",
1296
+ "8. QUANTITATIVE METRICS AND NLP FEATURES"
 
 
 
 
 
1297
  ]
1298
 
1299
  # Safety limits to prevent infinite loops
 
1545
  - Count [REPETITION] markers: Categorize by type (word, phrase, sound)
1546
  - Count [REVISION] markers: Analyze self-correction patterns
1547
  - Count [PAUSE] markers: Assess hesitation frequency
1548
+ - Calculate total disfluency rate
1549
 
1550
  B. Word Retrieval Issues:
1551
  - Count [CIRCUMLOCUTION] markers: List each roundabout description
 
1588
 
1589
  C. Sentence Structure Analysis:
1590
  - Use calculated MLU: {linguistic_metrics.get('mlu_words', 0)} words, {linguistic_metrics.get('mlu_morphemes', 0)} morphemes
1591
+ - Calculate complexity ratios
1592
 
1593
  4. FIGURATIVE LANGUAGE ANALYSIS (with exact counts):
1594
 
 
1668
  - Grammar error rate: Calculate from marker counts
1669
  - Vocabulary sophistication ratio: {marker_analysis.get('category_totals', {}).get('vocab_sophistication_ratio', 0):.3f}
1670
 
 
 
 
 
 
 
 
 
 
 
 
 
1671
  CRITICAL REQUIREMENTS:
1672
  - Use the provided calculated metrics in your analysis
1673
  - Provide EXACT counts for every marker type
1674
  - Calculate precise percentages and show your work
1675
  - Give specific examples from the transcript
1676
  - If annotation is incomplete, supplement with analysis of the original transcript
1677
+ - Complete ALL 8 sections - use <CONTINUE> if needed
1678
+ - Focus on objective data only - NO clinical interpretations
1679
  """
1680
 
1681
  return call_claude_api_with_continuation(analysis_prompt)
 
2051
 
2052
  Provide a comprehensive clinical interpretation organized into these sections:
2053
 
2054
+ 1. LEXICAL DIVERSITY DATA:
2055
+ - Report the advanced lexical diversity measures (MTLD, HDD, MATTR, etc.)
2056
+ - Provide objective data interpretation only
 
2057
 
2058
+ 2. FLUENCY PATTERN DATA:
2059
+ - Report fluency marker counts and rates
2060
+ - Provide objective data summary only
 
2061
 
2062
+ 3. GRAMMATICAL PATTERN DATA:
2063
+ - Report grammar error patterns from verified counts
2064
+ - Provide objective data summary only
 
2065
 
2066
  4. VOCABULARY AND SEMANTIC ANALYSIS:
2067
  - Interpretation of vocabulary sophistication measures
 
2078
  - Strengths and areas of need
2079
  - Functional communication impact
2080
 
2081
+ Focus on OBJECTIVE DATA INTERPRETATION only, not clinical significance.
 
 
 
 
 
 
2082
  All measurements are already verified and accurate.
2083
+ Cite specific examples from the transcript to support your observations.
2084
  """
2085
 
2086
  ai_interpretation = call_claude_api(verified_prompt)
 
2168
  - Filler words: Use verified count of {marker_counts.get('FILLER', 0)} fillers
2169
  * Calculate rate per 100 words: {marker_counts.get('FILLER', 0)/total_words*100:.2f}%
2170
  * Identify types and provide examples from transcript
2171
+ * Provide objective count summary
2172
  - False starts: Use verified count of {marker_counts.get('FALSE_START', 0)}
2173
  * Provide specific examples from transcript
2174
  * Analyze patterns and self-correction abilities
2175
  - Repetitions: Use verified count of {marker_counts.get('REPETITION', 0)}
2176
  * Categorize types (word, phrase, sound level)
2177
+ * Provide examples and count summary
2178
  - Total disfluency assessment: Use verified total of {category_totals['fluency_issues']}
2179
  * Rate: {category_totals['fluency_issues']/total_words*100:.2f} per 100 words
2180
+ * Provide objective rate calculation
2181
 
2182
  B. Word Retrieval Issues:
2183
  - Circumlocutions: Count and analyze from transcript
 
2202
 
2203
  B. Grammar and Morphology:
2204
  - Error pattern analysis using verified counts
2205
+ - Pattern analysis only
2206
  - Morphological complexity evaluation
2207
 
2208
  3. COMPLEX SENTENCE ANALYSIS (use verified counts)
 
2229
  - Tangential speech: Use verified count of {marker_counts.get('TANGENT', 0)}
2230
  - Coherence breaks: Use verified count of {marker_counts.get('COHERENCE_BREAK', 0)}
2231
  - Referential clarity: Use verified count of {marker_counts.get('PRONOUN_REF', 0)}
2232
+ - Overall conversational patterns observed
2233
 
2234
  6. VOCABULARY AND SEMANTIC ANALYSIS
2235
  - Semantic errors: Use verified count of {marker_counts.get('SEMANTIC_ERROR', 0)}
 
2241
  - Morphological complexity assessment
2242
  - Derivational and inflectional morphology patterns
2243
  - Error analysis using verified counts
2244
+ - Pattern analysis only
 
 
 
 
 
 
2245
 
2246
+ 8. QUANTITATIVE METRICS AND NLP FEATURES (use ALL verified data)
 
 
 
 
 
 
2247
  - Total words: {total_words}
2248
  - Total sentences: {linguistic_metrics.get('total_sentences', 0)}
2249
  - Unique words: {linguistic_metrics.get('unique_words', 0)}
 
2251
  - MLU morphemes: {linguistic_metrics.get('mlu_morphemes', 0):.2f}
2252
  - All error rates and ratios from verified counts
2253
 
2254
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2255
 
2256
  CRITICAL: Complete ALL 13 sections using verified data and specific transcript examples.
2257
  """