Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import os | |
| from datetime import datetime | |
| from typing import List, Dict, Tuple | |
| from extractors import extract_docx, extract_pdf, validate_extraction | |
| from tagging import tag_speakers_advanced | |
| from chunking import chunk_text_semantic | |
| from llm import query_llm, extract_structured_data | |
| from reporting import generate_enhanced_csv, generate_enhanced_pdf | |
| from dashboard import generate_comprehensive_dashboard | |
| from validation import validate_transcript_quality, check_data_completeness | |
| from audio_transcriber import transcribe_with_diarization_streaming | |
| # HuggingFace Spaces Configuration | |
| import os | |
| os.environ["LLM_BACKEND"] = "hf_api" | |
| os.environ["LLM_TIMEOUT"] = "25" | |
| os.environ["MAX_TOKENS_PER_REQUEST"] = "100" | |
| print("π Running on HuggingFace Spaces - Optimized Configuration Loaded") | |
| def preprocess_audio(audio_files, num_speakers): | |
| """Convert audio to transcripts""" | |
| if not audio_files: | |
| return None, "No audio files provided" | |
| transcript_paths = [] | |
| status = "" | |
| for audio in audio_files: | |
| try: | |
| # Get the actual file path | |
| audio_path = audio.name if hasattr(audio, 'name') else str(audio) | |
| transcript_path = transcribe_with_diarization(audio_path, num_speakers) | |
| transcript_paths.append(transcript_path) | |
| status += f"β {os.path.basename(audio_path)} β {transcript_path}\n" | |
| except Exception as e: | |
| status += f"β {os.path.basename(audio_path)}: {str(e)}\n" | |
| # Return list of paths for file component | |
| return transcript_paths if transcript_paths else None, status | |
| def analyze(files, file_type, user_comments, role_hint, debug_mode, interviewee_type, progress=gr.Progress()): | |
| """ | |
| Enhanced analysis pipeline with robust error handling and validation | |
| """ | |
| os.environ["DEBUG_MODE"] = str(debug_mode) | |
| if not files: | |
| return "Error: No files uploaded", None, None, None | |
| all_results = [] | |
| csv_rows = [] | |
| processing_errors = [] | |
| progress(0, desc="Initializing...") | |
| print(f"[Start] Processing {len(files)} file(s) as {file_type}") | |
| # Enhanced interviewee context | |
| interviewee_context = { | |
| "HCP": { | |
| "focus": "clinical reasoning, peer communication, medical expertise, prescribing patterns", | |
| "extract": ["diagnoses", "treatment_rationale", "clinical_decisions", "prescriptions", "guidelines_mentioned"] | |
| }, | |
| "Patient": { | |
| "focus": "symptoms, concerns, emotional state, treatment understanding, adherence", | |
| "extract": ["symptoms", "concerns", "treatment_response", "quality_of_life", "side_effects"] | |
| }, | |
| "Other": { | |
| "focus": "context-dependent insights, relevant observations", | |
| "extract": ["key_insights", "context", "recommendations"] | |
| } | |
| }.get(interviewee_type, {}) | |
| # Build enhanced user context | |
| user_context = f""" | |
| Interviewee Type: {interviewee_type} | |
| Analysis Focus: {interviewee_context.get('focus', 'general insights')} | |
| Key Data Points to Extract: {', '.join(interviewee_context.get('extract', []))} | |
| Additional Instructions: | |
| {user_comments} | |
| """.strip() | |
| total_steps = len(files) * 4 + 2 # extraction, validation, tagging, chunking per file + summary + report | |
| current_step = 0 | |
| for i, file in enumerate(files): | |
| file_name = os.path.basename(file.name) | |
| try: | |
| # Step 1: Extract text | |
| progress((current_step / total_steps), desc=f"Extracting {file_name}...") | |
| print(f"[File {i+1}/{len(files)}] Extracting: {file_name}") | |
| raw_text = extract_docx(file) if file_type == "DOCX" else extract_pdf(file) | |
| current_step += 1 | |
| # Step 2: Validate extraction | |
| progress((current_step / total_steps), desc=f"Validating {file_name}...") | |
| is_valid, validation_msg = validate_extraction(raw_text, file_name) | |
| if not is_valid: | |
| raise ValueError(f"Extraction validation failed: {validation_msg}") | |
| print(f"[File {i+1}] Extracted {len(raw_text)} characters - Valid: {validation_msg}") | |
| current_step += 1 | |
| # Step 3: Tag speakers with advanced logic | |
| progress((current_step / total_steps), desc=f"Analyzing speakers in {file_name}...") | |
| tagged_text = tag_speakers_advanced(raw_text, role_hint, interviewee_type) | |
| print(f"[File {i+1}] Tagged {len(tagged_text)} characters") | |
| current_step += 1 | |
| # Step 4: Semantic chunking | |
| progress((current_step / total_steps), desc=f"Processing {file_name}...") | |
| chunks = chunk_text_semantic(tagged_text, interviewee_type) | |
| print(f"[File {i+1}] Created {len(chunks)} semantic chunk(s)") | |
| current_step += 1 | |
| # Step 5: LLM Analysis with structured extraction | |
| transcript_result = [] | |
| structured_data = {} | |
| for j, chunk in enumerate(chunks): | |
| chunk_progress = (current_step + (j / len(chunks))) / total_steps | |
| progress(chunk_progress, desc=f"Analyzing {file_name} ({j+1}/{len(chunks)})...") | |
| result, chunk_data = query_llm( | |
| chunk, | |
| user_context, | |
| interviewee_type, | |
| extract_structured=True | |
| ) | |
| transcript_result.append(result) | |
| # Merge structured data | |
| for key, value in chunk_data.items(): | |
| if key not in structured_data: | |
| structured_data[key] = [] | |
| if isinstance(value, list): | |
| structured_data[key].extend(value) | |
| else: | |
| structured_data[key].append(value) | |
| current_step += 1 | |
| # Combine and validate results | |
| full_text = "\n\n".join(transcript_result) | |
| # Quality check | |
| quality_score, quality_issues = validate_transcript_quality( | |
| full_text, | |
| structured_data, | |
| interviewee_type | |
| ) | |
| if quality_score < 0.3: | |
| print(f"[Warning] Low quality score ({quality_score:.2f}) for {file_name}: {quality_issues}") | |
| processing_errors.append(f"{file_name}: Low quality - {quality_issues}") | |
| all_results.append({ | |
| "transcript_id": f"Transcript {i+1}", | |
| "file_name": file_name, | |
| "full_text": full_text, | |
| "structured_data": structured_data, | |
| "quality_score": quality_score, | |
| "word_count": len(raw_text.split()) | |
| }) | |
| # Enhanced CSV row with structured data | |
| csv_row = { | |
| "Transcript ID": f"Transcript {i+1}", | |
| "File Name": file_name, | |
| "Quality Score": f"{quality_score:.2f}", | |
| "Word Count": len(raw_text.split()), | |
| } | |
| # Add interviewee-specific fields | |
| if interviewee_type == "HCP": | |
| csv_row.update({ | |
| "Diagnoses": "; ".join(structured_data.get("diagnoses", [])), | |
| "Prescriptions": "; ".join(structured_data.get("prescriptions", [])), | |
| "Treatment Strategies": "; ".join(structured_data.get("treatment_rationale", [])), | |
| "Guidelines Mentioned": "; ".join(structured_data.get("guidelines_mentioned", [])) | |
| }) | |
| elif interviewee_type == "Patient": | |
| csv_row.update({ | |
| "Primary Symptoms": "; ".join(structured_data.get("symptoms", [])), | |
| "Main Concerns": "; ".join(structured_data.get("concerns", [])), | |
| "Treatment Response": "; ".join(structured_data.get("treatment_response", [])), | |
| "Side Effects": "; ".join(structured_data.get("side_effects", [])) | |
| }) | |
| else: | |
| csv_row.update({ | |
| "Key Insights": "; ".join(structured_data.get("key_insights", [])), | |
| "Recommendations": "; ".join(structured_data.get("recommendations", [])) | |
| }) | |
| csv_rows.append(csv_row) | |
| print(f"[File {i+1}] β Processing complete") | |
| except Exception as e: | |
| # Enhanced error tracking with type and traceback | |
| import traceback | |
| error_type = type(e).__name__ | |
| error_details = str(e) | |
| error_traceback = traceback.format_exc() | |
| error_msg = f"[{error_type}] {file_name}: {error_details}" | |
| print(error_msg) | |
| # Store comprehensive error information | |
| processing_errors.append({ | |
| "transcript_id": f"Transcript {i+1}", | |
| "file_name": file_name, | |
| "error_type": error_type, | |
| "error_message": error_details[:200], # Truncate long messages | |
| "timestamp": datetime.now().isoformat() | |
| }) | |
| all_results.append({ | |
| "transcript_id": f"Transcript {i+1}", | |
| "file_name": file_name, | |
| "full_text": error_msg, | |
| "structured_data": {}, | |
| "quality_score": 0.0, | |
| "word_count": 0, | |
| "processing_status": "FAILED", | |
| "error_type": error_type | |
| }) | |
| # Add to CSV with error metadata | |
| csv_rows.append({ | |
| "Transcript ID": f"Transcript {i+1}", | |
| "File Name": file_name, | |
| "Quality Score": 0.0, | |
| "Word Count": 0, | |
| "Processing Status": "FAILED", | |
| "Error Type": error_type, | |
| "Error Message": error_details[:100] | |
| }) | |
| # Generate cross-transcript summary | |
| try: | |
| progress(0.9, desc="Generating summary and reports...") | |
| print("[Summary] Analyzing trends across transcripts") | |
| # Combine successful results | |
| valid_results = [r for r in all_results if r["quality_score"] > 0] | |
| if not valid_results: | |
| return "Error: No transcripts were successfully processed", None, None, None | |
| # Build comprehensive summary prompt | |
| summary_prompt = f""" | |
| CROSS-INTERVIEW SYNTHESIS TASK | |
| SAMPLE: {len(valid_results)} {interviewee_type} transcripts | |
| FOCUS AREAS: {interviewee_context.get('focus', 'general patterns')} | |
| COMPLETE TRANSCRIPT DATA: | |
| """ | |
| for idx, result in enumerate(valid_results, 1): | |
| summary_prompt += f"\n{'='*60}\nTRANSCRIPT {idx}/{len(valid_results)}: {result['file_name']}\n{'='*60}\n" | |
| summary_prompt += f"{result['full_text'][:2000]}\n" | |
| summary_prompt += f""" | |
| ANALYSIS REQUIREMENTS: | |
| 1. QUANTIFY EVERYTHING: | |
| - Count participants: "X out of {len(valid_results)} participants mentioned..." | |
| - Never use vague terms (many/most/some) | |
| - Calculate percentages where relevant | |
| 2. IDENTIFY PATTERNS BY CONSENSUS LEVEL: | |
| - STRONG CONSENSUS (80%+ = {int(len(valid_results)*0.8)}+ transcripts agree) | |
| - MAJORITY VIEW (60-79% = {int(len(valid_results)*0.6)}-{int(len(valid_results)*0.79)} transcripts) | |
| - SPLIT PERSPECTIVES (40-59% = mixed views) | |
| - MINORITY/OUTLIER (<40% but notable) | |
| 3. CROSS-VALIDATE: | |
| - Check for contradictions between transcripts | |
| - Note where perspectives diverge and why | |
| - Flag any quality issues in individual transcripts | |
| 4. CITE EVIDENCE: | |
| - Reference specific transcript numbers | |
| - Brief supporting details | |
| - Distinguish verified facts from interpretation | |
| OUTPUT FORMAT: | |
| Write 2-3 sentence executive overview, then structure as: | |
| **STRONG CONSENSUS FINDINGS:** | |
| - [Finding with count and evidence] | |
| **MAJORITY FINDINGS:** | |
| - [Finding with count] | |
| **DIVERGENT PERSPECTIVES:** | |
| - [Where views split and context] | |
| **NOTABLE OUTLIERS:** | |
| - [Unique but important points] | |
| **DATA QUALITY NOTES:** | |
| - [Any gaps or transcript issues] | |
| Be specific. Use numbers. Cite transcript IDs. Flag weak evidence. | |
| """ | |
| # Use robust LLM with aggressive timeout protection | |
| print("[Summary] Generating cross-transcript summary...") | |
| print("[Summary] Note: This may take 30-60 seconds for large datasets") | |
| try: | |
| from llm_robust import query_llm_with_timeout | |
| summary, summary_data = query_llm_with_timeout( | |
| summary_prompt, | |
| user_context, | |
| interviewee_type, | |
| extract_structured=False, | |
| is_summary=True, | |
| max_timeout=60 # 60 second hard timeout | |
| ) | |
| except Exception as e: | |
| # Ultimate fallback | |
| print(f"[Summary] Critical error: {e}") | |
| print("[Summary] Using emergency fallback...") | |
| from llm_robust import generate_emergency_summary | |
| summary, summary_data = generate_emergency_summary(interviewee_type) | |
| # Validate summary quality and retry if needed | |
| from validation import validate_summary_quality | |
| summary_score, summary_issues = validate_summary_quality( | |
| summary, | |
| len(valid_results) | |
| ) | |
| if summary_score < 0.7: # Quality threshold | |
| print(f"[Warning] Summary quality issues (score: {summary_score:.2f}): {summary_issues}") | |
| print("[Summary] Retrying with stricter validation...") | |
| # Retry with enhanced prompt emphasizing validation failures | |
| retry_prompt = summary_prompt + f""" | |
| CRITICAL: Previous attempt failed validation with these issues: | |
| {chr(10).join('- ' + issue for issue in summary_issues)} | |
| MANDATORY CORRECTIONS: | |
| - Use ONLY specific numbers (e.g., "8 out of {len(valid_results)}" not "most") | |
| - Include percentages in parentheses | |
| - Cite transcript numbers for every claim | |
| - Minimum length: 500 words | |
| - No absolute terms (all/everyone/never/always) without 100% evidence | |
| """ | |
| try: | |
| summary, summary_data = query_llm_with_timeout( | |
| retry_prompt, | |
| user_context, | |
| interviewee_type, | |
| extract_structured=False, | |
| is_summary=True, | |
| max_timeout=60 # 60 second hard timeout for retry | |
| ) | |
| except Exception as e: | |
| print(f"[Summary] Retry also failed: {e}") | |
| print("[Summary] Using emergency fallback for retry...") | |
| summary, summary_data = generate_emergency_summary(interviewee_type) | |
| # Re-validate | |
| summary_score, summary_issues = validate_summary_quality(summary, len(valid_results)) | |
| if summary_score < 0.7: | |
| # Add quality warning to summary header | |
| warning_header = f"""[QUALITY WARNING - Score: {summary_score:.2f}] | |
| Validation issues detected: {'; '.join(summary_issues)} | |
| Please review findings carefully and verify against source data. | |
| {'='*60} | |
| """ | |
| summary = warning_header + summary | |
| print(f"[Warning] Summary still has issues after retry (score: {summary_score:.2f})") | |
| else: | |
| print(f"[Summary] β Validation passed after retry (score: {summary_score:.2f})") | |
| else: | |
| print(f"[Summary] β Validation passed (score: {summary_score:.2f})") | |
| # Verify consensus claims against actual data | |
| from validation import verify_consensus_claims | |
| consensus_warnings = verify_consensus_claims(summary, valid_results) | |
| if consensus_warnings: | |
| print(f"[Warning] Consensus verification issues: {len(consensus_warnings)} found") | |
| consensus_note = "\n\n[CONSENSUS VERIFICATION NOTES]:\n" + "\n".join(f"- {w}" for w in consensus_warnings) + "\n\n" | |
| summary = summary + consensus_note | |
| else: | |
| print("[Summary] β Consensus claims verified") | |
| # Generate enhanced reports | |
| csv_path = generate_enhanced_csv(csv_rows, interviewee_type) | |
| print(f"[CSV] β Saved to {csv_path}") | |
| pdf_path = generate_enhanced_pdf( | |
| summary, | |
| all_results, | |
| interviewee_type, | |
| processing_errors | |
| ) | |
| print(f"[PDF] β Saved to {pdf_path}") | |
| dashboard = generate_comprehensive_dashboard(csv_rows, interviewee_type) | |
| print("[Dashboard] β Generated") | |
| # Compile final output | |
| output_text = f"""# Analysis Complete | |
| ## Summary of Findings | |
| {summary} | |
| ## Processing Statistics | |
| - Total Files: {len(files)} | |
| - Successfully Processed: {len(valid_results)} | |
| - Failed: {len(processing_errors)} | |
| - Average Quality Score: {sum(r['quality_score'] for r in valid_results) / len(valid_results):.2f} | |
| """ | |
| if processing_errors: | |
| output_text += f"\n## Processing Errors\n" + "\n".join(f"- {err}" for err in processing_errors) | |
| output_text += "\n\n---\n\n## Individual Transcript Results\n\n" | |
| for result in all_results: | |
| output_text += f"### {result['transcript_id']} - {result['file_name']}\n" | |
| output_text += f"Quality Score: {result['quality_score']:.2f} | Words: {result['word_count']}\n\n" | |
| output_text += result['full_text'] + "\n\n---\n\n" | |
| progress(1.0, desc="Complete!") | |
| return output_text, csv_path, pdf_path, dashboard | |
| except Exception as e: | |
| error_msg = f"[Fatal Error] Summary or report generation failed: {str(e)}" | |
| print(error_msg) | |
| import traceback | |
| traceback.print_exc() | |
| return error_msg, None, None, None | |
| def generate_narrative_report_ui(csv_file, summary_text, interviewee_type, report_style): | |
| """ | |
| Wrapper function for Gradio UI to generate narrative reports | |
| """ | |
| try: | |
| from narrative_report_generator import generate_narrative_report | |
| import tempfile | |
| import os | |
| # Check if CSV file exists | |
| if csv_file is None: | |
| return "Error: No CSV file provided. Please run analysis first.", None, None, None | |
| # Save summary text to temp file if provided | |
| summary_path = None | |
| if summary_text and summary_text.strip(): | |
| with tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.txt') as f: | |
| f.write(summary_text) | |
| summary_path = f.name | |
| # Determine LLM backend | |
| llm_backend = "lmstudio" if os.getenv("USE_LMSTUDIO", "False").lower() == "true" else "hf_api" | |
| # Generate narrative report | |
| pdf_path, word_path, html_path = generate_narrative_report( | |
| csv_path=csv_file.name if hasattr(csv_file, 'name') else csv_file, | |
| summary_path=summary_path, | |
| interviewee_type=interviewee_type, | |
| report_style=report_style, | |
| llm_backend=llm_backend | |
| ) | |
| # Clean up temp file | |
| if summary_path and os.path.exists(summary_path): | |
| os.remove(summary_path) | |
| return ( | |
| f"β Narrative reports generated successfully!\n\nPDF: {pdf_path}\nWord: {word_path}\nHTML: {html_path}", | |
| pdf_path, | |
| word_path, | |
| html_path | |
| ) | |
| except Exception as e: | |
| import traceback | |
| error_detail = traceback.format_exc() | |
| return f"Error generating narrative report: {str(e)}\n\n{error_detail}", None, None, None | |
| with gr.Blocks(theme=gr.themes.Soft()) as demo: | |
| gr.Markdown(""" | |
| # π― TranscriptorAI - Enterprise Transcript Analyzer | |
| Upload multiple transcripts and generate comprehensive, structured insights with advanced AI analysis. | |
| """) | |
| with gr.Tabs(): | |
| with gr.TabItem("π€ Audio Preprocessing"): | |
| gr.Markdown(""" | |
| Upload audio interviews to auto-transcribe with speaker identification. | |
| Outputs DOCX files ready for analysis. | |
| """) | |
| with gr.Row(): | |
| audio_input = gr.File( | |
| label="Upload Audio Files", | |
| file_types=[".mp3", ".wav", ".m4a", ".flac"], | |
| file_count="multiple" | |
| ) | |
| num_speakers_input = gr.Slider( | |
| minimum=1, | |
| maximum=5, | |
| value=2, | |
| step=1, | |
| label="Number of Speakers" | |
| ) | |
| transcribe_btn = gr.Button("ποΈ Transcribe Audio", variant="primary") | |
| transcribe_status = gr.Textbox(label="Status", lines=10) | |
| transcript_files = gr.File(label="Download Transcripts", file_count="multiple") | |
| transcribe_btn.click( | |
| fn=preprocess_audio, | |
| inputs=[audio_input, num_speakers_input], | |
| outputs=[transcript_files, transcribe_status] | |
| ) | |
| gr.Markdown(""" | |
| **Next:** Download transcripts, then go to "Transcript Analysis" tab to analyze them. | |
| """) | |
| with gr.TabItem("π Transcript Analysis"): | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| files = gr.File( | |
| label="π Upload Transcripts", | |
| file_types=[".docx", ".pdf"], | |
| file_count="multiple" | |
| ) | |
| file_type = gr.Radio( | |
| ["DOCX", "PDF"], | |
| label="File Type", | |
| value="DOCX" | |
| ) | |
| interviewee_type = gr.Radio( | |
| ["HCP", "Patient", "Other"], | |
| label="Interviewee Type", | |
| value="Patient", | |
| info="Select the type of person being interviewed" | |
| ) | |
| with gr.Column(scale=1): | |
| user_comments = gr.Textbox( | |
| label="Analysis Instructions", | |
| lines=6, | |
| placeholder="Enter specific analysis goals, questions to answer, or context...", | |
| info="Provide guidance for the AI analyzer" | |
| ) | |
| role_hint = gr.Textbox( | |
| label="Speaker Role Mapping (Optional)", | |
| placeholder="e.g., Speaker 1 = Interviewer, Speaker 2 = Doctor", | |
| info="Help identify speakers if needed" | |
| ) | |
| with gr.Row(): | |
| debug_mode = gr.Checkbox(label="π Enable Debug Mode", value=False) | |
| analyze_btn = gr.Button("π Analyze Transcripts", variant="primary", scale=2) | |
| with gr.Row(): | |
| output_text = gr.Textbox(label="π Analysis Report", lines=40) | |
| with gr.Row(): | |
| csv_output = gr.File(label="π₯ Download CSV") | |
| pdf_output = gr.File(label="π₯ Download PDF") | |
| with gr.Row(): | |
| dashboard_output = gr.Plot(label="π Dashboard Visualization") | |
| analyze_btn.click( | |
| fn=analyze, | |
| inputs=[files, file_type, user_comments, role_hint, debug_mode, interviewee_type], | |
| outputs=[output_text, csv_output, pdf_output, dashboard_output] | |
| ) | |
| with gr.TabItem("π Narrative Report"): | |
| gr.Markdown(""" | |
| ## Generate Storytelling Report | |
| Transform your analysis into a narrative report with: | |
| - Executive summary with key insights | |
| - Data-driven storytelling | |
| - Professional formatting (PDF, Word, HTML) | |
| - Actionable recommendations | |
| **Instructions:** First run the analysis in the previous tab, then use the outputs here to generate a narrative report. | |
| """) | |
| with gr.Row(): | |
| with gr.Column(): | |
| narrative_csv = gr.File( | |
| label="CSV Output from Analysis", | |
| file_types=[".csv"] | |
| ) | |
| narrative_summary = gr.Textbox( | |
| label="Copy/Paste Summary Text from Analysis (Optional)", | |
| lines=10, | |
| placeholder="Paste the executive summary text here..." | |
| ) | |
| with gr.Column(): | |
| narrative_interviewee_type = gr.Radio( | |
| ["HCP", "Patient", "Other"], | |
| label="Interviewee Type", | |
| value="Patient" | |
| ) | |
| narrative_report_style = gr.Radio( | |
| ["executive", "detailed", "presentation"], | |
| label="Report Style", | |
| value="executive", | |
| info="Executive = concise C-level report, Detailed = thorough analysis, Presentation = slide-ready" | |
| ) | |
| generate_narrative_btn = gr.Button("π Generate Narrative Report", variant="primary") | |
| narrative_status = gr.Textbox(label="Status", lines=5) | |
| with gr.Row(): | |
| narrative_pdf_output = gr.File(label="π₯ Download PDF Report") | |
| narrative_word_output = gr.File(label="π₯ Download Word Report") | |
| narrative_html_output = gr.File(label="π₯ Download HTML Report") | |
| generate_narrative_btn.click( | |
| fn=generate_narrative_report_ui, | |
| inputs=[narrative_csv, narrative_summary, narrative_interviewee_type, narrative_report_style], | |
| outputs=[narrative_status, narrative_pdf_output, narrative_word_output, narrative_html_output] | |
| ) | |
| with gr.TabItem("β Help"): | |
| gr.Markdown(""" | |
| ### Quick Start Guide | |
| **Step 1: Analyze Transcripts** | |
| 1. Upload your DOCX or PDF files | |
| 2. Select interviewee type (HCP, Patient, or Other) | |
| 3. Add analysis instructions | |
| 4. Click "Analyze Transcripts" | |
| 5. Download CSV, PDF, and view dashboard | |
| **Step 2: Generate Narrative Report (Optional)** | |
| 1. Go to "Narrative Report" tab | |
| 2. Upload the CSV from Step 1 | |
| 3. Optionally paste the summary text | |
| 4. Select report style | |
| 5. Click "Generate Narrative Report" | |
| 6. Download PDF, Word, or HTML versions | |
| ### Tips | |
| - **CSV Upload**: Download the CSV from analysis, then upload it to narrative report generator | |
| - **Summary Text**: Copy from the "Analysis Report" textbox and paste into narrative generator | |
| - **Report Styles**: | |
| - **Executive**: Best for C-level, investors, decision-makers | |
| - **Detailed**: Best for researchers, comprehensive analysis | |
| - **Presentation**: Best for slides, briefings, quick overviews | |
| ### LLM Configuration | |
| - Set `USE_LMSTUDIO=True` to use your local LM Studio | |
| - Set `HUGGINGFACE_TOKEN` to use HF API for faster processing | |
| - Default: Uses local model (slower but free) | |
| ### Support | |
| For issues, check the console output or enable debug mode. | |
| """) | |
| gr.Markdown(""" | |
| --- | |
| **TranscriptorAI** | Enterprise-grade transcript analysis with narrative reporting | |
| """) | |
| if __name__ == "__main__": | |
| demo.queue( | |
| concurrency_count=1, | |
| max_size=10, | |
| api_open=False | |
| ).launch( | |
| server_name="0.0.0.0", | |
| server_port=7860, | |
| show_error=True | |
| ) | |