""" Strike Capital AI Diligence Automation - Gradio Application V0 Document Generation from Harmonic PDFs. """ import os import re import tempfile import gradio as gr import markdown2 from fpdf import FPDF from config import validate_config from pdf_extractor import extract_from_pdf from pinecone_store import store_extracted_data, store_v0_document, store_sentence_chunks from v0_generator import generate_v0_document # Validate configuration on startup try: validate_config() print("[OK] Configuration validated successfully") except ValueError as e: print(f"[ERROR] Configuration error: {e}") # Custom CSS for Strike Capital branding STRIKE_CSS = """ @import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&display=swap'); body { font-family: 'Inter', -apple-system, BlinkMacSystemFont, sans-serif; line-height: 1.6; color: #1a1a1a; max-width: 900px; margin: 0 auto; padding: 40px; background: #ffffff; } h1 { color: #0a0a0a; font-weight: 700; font-size: 28px; border-bottom: 3px solid #2563eb; padding-bottom: 12px; margin-bottom: 24px; } h2 { color: #0a0a0a; font-weight: 600; font-size: 20px; margin-top: 32px; margin-bottom: 16px; padding-bottom: 8px; border-bottom: 1px solid #e5e5e5; } h3 { color: #262626; font-weight: 600; font-size: 16px; margin-top: 24px; } p { margin-bottom: 16px; } ul, ol { margin-bottom: 16px; padding-left: 24px; } li { margin-bottom: 8px; } /* Table wrapper for horizontal scroll */ .table-wrapper { overflow-x: auto; margin: 16px 0; } table { width: 100%; min-width: 600px; border-collapse: collapse; font-size: 13px; display: block; overflow-x: auto; } th { background: #f5f5f5; font-weight: 600; text-align: left; padding: 10px 8px; border: 1px solid #e5e5e5; white-space: nowrap; } td { padding: 8px; border: 1px solid #e5e5e5; min-width: 80px; } tr:nth-child(even) { background: #fafafa; } strong { font-weight: 600; } em { color: #525252; } hr { border: none; border-top: 1px solid #e5e5e5; margin: 24px 0; } code { background: #f5f5f5; padding: 2px 6px; border-radius: 4px; font-size: 14px; } blockquote { border-left: 4px solid #2563eb; padding-left: 16px; margin: 16px 0; color: #525252; } .header-meta { color: #737373; font-size: 14px; margin-bottom: 24px; } @media print { body { padding: 20px; } h2 { page-break-after: avoid; } } """ def process_pdf(pdf_file, progress=gr.Progress()): """ Main processing pipeline: PDF → Extract → Generate V0 → Store. Args: pdf_file: Uploaded PDF file progress: Gradio progress tracker Returns: Tuple of (markdown_content, html_preview, status_message) """ if pdf_file is None: return "", "
Please upload a Harmonic PDF report.
", "⚠️ No file uploaded" try: # Step 1: Extract data from PDF (returns extracted_data AND sentence_chunks) progress(0.1, desc="📄 Extracting text and chunking sentences...") def extraction_progress(batch, total): progress(0.1 + (0.3 * batch / total), desc=f"🔍 Processing batch {batch}/{total}...") extracted_data, sentence_chunks = extract_from_pdf(pdf_file.name, progress_callback=extraction_progress) # Get company name company_name = extracted_data.get("company_info", {}).get("name", "Unknown Company") progress(0.4, desc=f"✓ Extracted data for {company_name}") # Step 2: Store sentence chunks in Pinecone (one vector per sentence) progress(0.45, desc=f"💾 Storing {len(sentence_chunks)} sentence chunks in Pinecone...") chunks_result = store_sentence_chunks(company_name, sentence_chunks) # Step 3: Store extracted sections in Pinecone progress(0.55, desc="💾 Storing extracted sections...") store_result = store_extracted_data(company_name, extracted_data) # Step 4: Generate V0 document def generation_progress(step, total, section_name): progress(0.6 + (0.35 * step / total), desc=f"✍️ Generating: {section_name}...") v0_document = generate_v0_document( extracted_data, company_name, progress_callback=generation_progress ) # Step 5: Store V0 document progress(0.95, desc="💾 Saving V0 document...") store_v0_document(company_name, v0_document) # Convert to HTML for preview html_content = markdown2.markdown( v0_document, extras=["tables", "fenced-code-blocks", "header-ids"] ) html_preview = f"{html_content}" progress(1.0, desc="✓ Complete!") status = f"✅ Successfully generated V0 for **{company_name}**\n\n" status += f"- Sentence chunks stored: {chunks_result.get('chunks_stored', 'N/A')}\n" status += f"- Sections stored: {store_result.get('sections_stored', 'N/A')}\n" status += f"- Document version: v0" return v0_document, html_preview, status except Exception as e: error_msg = f"❌ Error: {str(e)}" return "", f"{error_msg}
", error_msg def export_to_html(markdown_content, auto_print=False): """Export markdown to styled HTML file with optional auto-print dialog.""" if not markdown_content: return None html_content = markdown2.markdown( markdown_content, extras=["tables", "fenced-code-blocks", "header-ids"] ) # Auto-print script (opens print dialog when page loads) print_script = """ """ if auto_print else "" full_html = f"""Upload a PDF to generate your V0 document...
" ) with gr.TabItem("Markdown"): markdown_output = gr.Textbox( label="Raw Markdown", lines=30, max_lines=50, show_copy_button=True ) # Event handlers generate_btn.click( fn=process_pdf, inputs=[pdf_input], outputs=[markdown_output, html_preview, status_output], show_progress=True ) export_html_btn.click( fn=export_to_html, inputs=[markdown_output], outputs=[html_download] ).then( fn=lambda x: gr.update(visible=True) if x else gr.update(visible=False), inputs=[html_download], outputs=[html_download] ) export_pdf_btn.click( fn=export_to_pdf, inputs=[markdown_output], outputs=[pdf_download] ).then( fn=lambda x: gr.update(visible=True) if x else gr.update(visible=False), inputs=[pdf_download], outputs=[pdf_download] ) gr.Markdown(""" --- *Strike Capital AI Diligence System v0.1 | Powered by GPT-5.1 + Pinecone* """) # For Hugging Face Spaces Docker deployment app.launch( server_name="0.0.0.0", server_port=7860 )