import gradio as gr from docx import Document import re def clean_markdown_heading(text): return re.sub(r"^#+\s*", "", text).strip() def get_color(tag): return { "docx": "#1f77b4", "markdown": "#2ca02c", "segment": "#ff7f0e", "colon": "#9467bd", "stage": "#d62728" }.get(tag, "black") def process_doc(file): removed_headings = [] cleaned_paragraphs = [] if file is not None: doc = Document(file.name) for para in doc.paragraphs: text = para.text.strip() if not text: continue style_name = para.style.name.lower() is_docx_heading = "heading" in style_name is_markdown_heading = re.match(r"^#+\s+", text) is_segment_heading = re.match(r"(segment\s*\d+)", text.lower()) is_colon_title = text.endswith(":") and len(text.split()) <= 6 is_stage_direction = re.match(r"^\*\[.*\]\*$", text) if ( is_docx_heading or is_markdown_heading or is_segment_heading or is_colon_title or is_stage_direction ): if is_markdown_heading: clean_text = clean_markdown_heading(text) removed_headings.append((clean_text, "markdown")) elif is_stage_direction: clean_text = re.sub(r"^\*\[|\]\*$", "", text).strip() removed_headings.append((clean_text, "stage")) elif is_docx_heading: removed_headings.append((text, "docx")) elif is_segment_heading: removed_headings.append((text, "segment")) elif is_colon_title: removed_headings.append((text, "colon")) else: cleaned_paragraphs.append(text) cleaned_text = "\n\n".join(cleaned_paragraphs) if removed_headings: headings_output = "

".join( f"{text}" for text, tag in removed_headings ) else: headings_output = " 🔴 No headings found 🔴 " return headings_output, cleaned_text # ✅ Custom CSS targeting only the cleaned output textbox custom_css = """ #cleaned_output textarea { background-color: black !important; color: white !important; } """ with gr.Blocks(theme=gr.themes.Soft(), css=custom_css) as app: with gr.Row(): with gr.Column(scale=1): pass with gr.Column(scale=2): gr.Markdown("## Upload Docx Here to Remove Titles") with gr.Column(scale=1): pass with gr.Row(): with gr.Column(scale=1): pass with gr.Column(scale=2): file_input = gr.File( file_types=[".docx"], label="📤 Upload DOCX File", height=60 ) process_btn = gr.Button("🚀 Click to Process", variant="primary") with gr.Column(scale=1): pass gr.Markdown("---") with gr.Row(): with gr.Column(): gr.Markdown("### ===========-------> Removed Headings <-------===========") headings_output = gr.Markdown() with gr.Column(): # ✅ elem_id added to target this textbox specifically cleaned_output = gr.Textbox(lines=25, elem_id="cleaned_output", buttons=["copy"]) process_btn.click( fn=process_doc, inputs=[file_input], outputs=[headings_output, cleaned_output] ) app.launch()