Spaces:

ObiJuanCodenobi
/

docgen

Sleeping

File size: 27,837 Bytes

import gradio as gr
import pypandoc
import os
from pdf2docx import Converter
from docx import Document
from docx.table import _Cell
from docx.shared import Inches, Pt, RGBColor
from docx.enum.text import WD_ALIGN_PARAGRAPH, WD_COLOR_INDEX
from docx.oxml.ns import qn
import json
import base64
import hashlib
import sys
import tempfile
from flask import Flask, request, jsonify, send_file
import threading
import secrets

os.system('sudo apt-get install texlive')

def ensure_pandoc_installed():
    try:
        # Periksa apakah pandoc sudah ada
        pypandoc.get_pandoc_version()
        print("Pandoc is already installed and accessible.")
    except OSError:
        # Unduh pandoc jika belum ada
        print("Pandoc not found, downloading...")
        pypandoc.download_pandoc()
        print("Pandoc downloaded successfully.")

# Pastikan Pandoc terpasang
ensure_pandoc_installed()

# Daftar format yang didukung
input_supported_formats = [data.upper() for data in sorted(list(pypandoc.get_pandoc_formats()[0]).append('PDF') or [
    'BIBLATEX', 'BIBTEX', 'BITS', 'COMMONMARK', 'COMMONMARK_X', 'CREOLE', 'CSLJSON', 'CSV',
    'DJOT', 'DOCBOOK', 'DOCX', 'DOKUWIKI', 'ENDNOTEXML', 'EPUB', 'FB2', 'GFM', 'HADDOCK',
    'HTML', 'IPYNB', 'JATS', 'JIRA', 'JSON', 'LATEX', 'MAN', 'MARKDOWN', 'MARKDOWN_GITHUB',
    'MARKDOWN_MMD', 'MARKDOWN_PHPEXTRA', 'MARKDOWN_STRICT', 'MDOC', 'MEDIAWIKI', 'MUSE',
    'NATIVE', 'ODT', 'OPML', 'ORG', 'PDF', 'POD', 'RIS', 'RST', 'RTF', 'T2T', 'TEXTILE',
    'TIKIWIKI', 'TSV', 'TWIKI', 'TYPST', 'VIMWIKI'
]) if data not in ['PDF']]

output_supported_formats = [data.upper() for data in sorted([
    "ANSI", "ASCIIDOC", "ASCIIDOC_LEGACY", "ASCIIDOCTOR", "BEAMER", "BIBLATEX", "BIBTEX", "CHUNKEDHTML", 
    "COMMONMARK", "COMMONMARK_X", "CONTEXT", "CSLJSON", "DJOT", "DOCBOOK", "DOCBOOK4", "DOCBOOK5", 
    "DOCX", "DOKUWIKI", "DZSLIDES", "EPUB", "EPUB2", "EPUB3", "FB2", "GFM", "HADDOCK", "HTML", 
    "HTML4", "HTML5", "ICML", "IPYNB", "JATS", "JATS_ARCHIVING", "JATS_ARTICLEAUTHORING", 
    "JATS_PUBLISHING", "JIRA", "JSON", "LATEX", "MAN", "MARKDOWN", "MARKDOWN_GITHUB", 
    "MARKDOWN_MMD", "MARKDOWN_PHPEXTRA", "MARKDOWN_STRICT", "MARKUA", "MEDIAWIKI", "MS", 
    "MUSE", "NATIVE", "ODT", "OPENDOCUMENT", "OPML", "ORG", "PDF", "PLAIN", "PPTX", "REVEALJS", 
    "RST", "RTF", "S5", "SLIDEOUS", "SLIDY", "TEI", "TEXINFO", "TEXTILE", "TYPST", "XWIKI", "ZIMWIKI"
]) if data not in ['PDF']]

def convert_pdf_to_docx(pdf_file):
    output_docx = f"{os.path.splitext(pdf_file)[0]}.docx"
    cv = Converter(pdf_file)
    cv.convert(output_docx, start=0, end=None)
    return output_docx

def get_preview(file_path):
    ext = os.path.splitext(file_path)[1].lower()
    try:
        if ext in ['.txt', '.md', '.csv', '.json']:
            with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
                content = f.read(2000)  # Preview first 2000 chars
                return f"<pre style='max-height:300px;overflow:auto'>{content}</pre>"
        elif ext == '.pdf':
            # Show PDF inline using HTML embed
            return f"<embed src='{file_path}' type='application/pdf' width='100%' height='400px' />"
        elif ext == '.docx':
            try:
                doc = Document(file_path)
                html = ""
                # Extract header(s) with paragraphs and tables
                headers = []
                for section in doc.sections:
                    header_texts = []
                    # Paragraphs
                    for p in section.header.paragraphs:
                        if p.text.strip():
                            header_texts.append(p.text.strip())
                    # Tables
                    for table in section.header.tables:
                        for row in table.rows:
                            row_text = " ".join(cell.text.strip() for cell in row.cells if cell.text.strip())
                            if row_text:
                                header_texts.append(row_text)
                    if header_texts:
                        headers.append(" | ".join(header_texts))
                if headers:
                    html += f"<div style='font-weight:bold;font-size:1.2em;margin-bottom:8px;'>{' | '.join(headers)}</div>"
                para_count = 0
                for para in doc.paragraphs:
                    text = para.text.strip()
                    if text:
                        html += f"<p>{text}</p>"
                        para_count += 1
                        if para_count > 30:
                            html += "<p><i>Preview truncated...</i></p>"
                            break
                return f"<div style='max-height:300px;overflow:auto'>{html}</div>"
            except Exception as e:
                return f"<b>Error reading DOCX:</b> {e}"
        elif ext == '.doc':
            return f"<b>DOC file:</b> {os.path.basename(file_path)} (Preview not supported)"
        else:
            return f"<b>File:</b> {os.path.basename(file_path)} (Preview not supported)"
    except Exception as e:
        return f"<b>Error generating preview:</b> {e}"

def extract_runs(paragraph):
    runs = []
    for run in paragraph.runs:
        run_data = {
            "text": run.text
        }
        if run.bold:
            run_data["bold"] = True
        if run.italic:
            run_data["italic"] = True
        if run.underline:
            run_data["underline"] = True
        if run.font and run.font.size:
            run_data["font_size"] = run.font.size.pt
        if run.font and run.font.name:
            run_data["font_name"] = run.font.name
        # Extract color (RGB or theme)
        if run.font and run.font.color:
            if run.font.color.rgb:
                run_data["color"] = str(run.font.color.rgb)
            elif run.font.color.theme_color:
                run_data["color_theme"] = str(run.font.color.theme_color)
        # Highlight color
        if run.font and hasattr(run.font, "highlight_color") and run.font.highlight_color:
            run_data["highlight"] = str(run.font.highlight_color)
        runs.append(run_data)
    return runs

# Detect heading and list paragraphs
def extract_paragraph_block(paragraph):
    style_name = paragraph.style.name if paragraph.style else "Normal"
    # Heading
    if style_name.startswith("Heading"):
        try:
            level = int(style_name.split()[-1])
        except Exception:
            level = 1
        return {
            "type": "heading",
            "level": level,
            "runs": extract_runs(paragraph),
            "alignment": str(paragraph.alignment) if paragraph.alignment else "left",
            "style": style_name
        }
    # List
    elif "List" in style_name:
        return {
            "type": "list_item",
            "list_type": "number" if "Number" in style_name else "bullet",
            "runs": extract_runs(paragraph),
            "alignment": str(paragraph.alignment) if paragraph.alignment else "left",
            "style": style_name
        }
    # Normal paragraph
    else:
        return {
            "type": "paragraph",
            "runs": extract_runs(paragraph),
            "alignment": str(paragraph.alignment) if paragraph.alignment else "left",
            "style": style_name
        }

# Add spacing extraction
def extract_blocks(element, output_dir, image_prefix):
    blocks = []
    if hasattr(element, 'paragraphs'):
        for para in element.paragraphs:
            if para.text.strip():
                para_block = extract_paragraph_block(para)
                # Add spacing info
                pf = para.paragraph_format
                if pf:
                    if pf.space_before:
                        para_block["space_before"] = pf.space_before.pt
                    if pf.space_after:
                        para_block["space_after"] = pf.space_after.pt
                    if pf.line_spacing:
                        para_block["line_spacing"] = pf.line_spacing
                blocks.append(para_block)
    if hasattr(element, 'tables'):
        for table in element.tables:
            blocks.append(extract_table_block(table))
    return blocks

def extract_table_block(table):
    rows = []
    for row in table.rows:
        row_cells = []
        for cell in row.cells:
            # Only take unique paragraphs (python-docx repeats cell objects)
            unique_paras = []
            seen = set()
            for para in cell.paragraphs:
                para_id = id(para)
                if para_id not in seen:
                    unique_paras.append(para)
                    seen.add(para_id)
            row_cells.append([extract_paragraph_block(para) for para in unique_paras if para.text.strip()])
        rows.append(row_cells)
    return {"type": "table", "rows": rows}

def extract_images_from_doc(doc, output_dir, image_prefix):
    image_blocks = []
    rels = doc.part.rels
    for rel in rels.values():
        if rel.reltype == 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/image':
            img_blob = rel.target_part.blob
            img_hash = hashlib.sha1(img_blob).hexdigest()[:8]
            img_ext = rel.target_part.content_type.split('/')[-1]
            img_id = f"{image_prefix}_{img_hash}"
            img_filename = f"{img_id}.{img_ext}"
            img_path = os.path.join(output_dir, img_filename)
            with open(img_path, 'wb') as f:
                f.write(img_blob)
            image_blocks.append({
                "type": "image",
                "image_id": img_id,
                "image_format": img_ext,
                "path": img_filename
            })
    return image_blocks

def add_runs_to_paragraph(paragraph, runs):
    for run_info in runs:
        run = paragraph.add_run(run_info.get("text", ""))
        if run_info.get("bold"): run.bold = True
        if run_info.get("italic"): run.italic = True
        if run_info.get("underline"): run.underline = True
        if run_info.get("font_size"): run.font.size = Pt(run_info["font_size"])
        if run_info.get("font_name"): run.font.name = run_info["font_name"]
        # Set color (RGB or theme)
        if run_info.get("color"):
            try:
                run.font.color.rgb = RGBColor.from_string(run_info["color"].replace("#", ""))
            except Exception:
                pass
        elif run_info.get("color_theme"):
            try:
                run.font.color.theme_color = int(run_info["color_theme"])
            except Exception:
                pass
        if run_info.get("highlight"):
            try:
                if run_info["highlight"].isdigit():
                    run.font.highlight_color = int(run_info["highlight"])
                else:
                    run.font.highlight_color = WD_COLOR_INDEX[run_info["highlight"]]
            except Exception:
                pass

# Add heading and list support
def add_block_to_doc(doc, block, image_dir):
    if block["type"] == "heading":
        level = block.get("level", 1)
        text = "".join([r.get("text", "") for r in block.get("runs", [])])
        para = doc.add_heading(text, level=level)
        add_runs_to_paragraph(para, block.get("runs", []))
        align = block.get("alignment", "left")
        if align == "center": para.alignment = WD_ALIGN_PARAGRAPH.CENTER
        elif align == "right": para.alignment = WD_ALIGN_PARAGRAPH.RIGHT
        else: para.alignment = WD_ALIGN_PARAGRAPH.LEFT
        # Spacing
        if "space_before" in block: para.paragraph_format.space_before = Pt(block["space_before"])
        if "space_after" in block: para.paragraph_format.space_after = Pt(block["space_after"])
        if "line_spacing" in block: para.paragraph_format.line_spacing = block["line_spacing"]
    elif block["type"] == "list_item":
        style = "List Number" if block.get("list_type") == "number" else "List Bullet"
        para = doc.add_paragraph(style=style)
        add_runs_to_paragraph(para, block.get("runs", []))
        align = block.get("alignment", "left")
        if align == "center": para.alignment = WD_ALIGN_PARAGRAPH.CENTER
        elif align == "right": para.alignment = WD_ALIGN_PARAGRAPH.RIGHT
        else: para.alignment = WD_ALIGN_PARAGRAPH.LEFT
        if "space_before" in block: para.paragraph_format.space_before = Pt(block["space_before"])
        if "space_after" in block: para.paragraph_format.space_after = Pt(block["space_after"])
        if "line_spacing" in block: para.paragraph_format.line_spacing = block["line_spacing"]
    elif block["type"] == "paragraph":
        para = doc.add_paragraph()
        add_runs_to_paragraph(para, block.get("runs", []))
        align = block.get("alignment", "left")
        if align == "center": para.alignment = WD_ALIGN_PARAGRAPH.CENTER
        elif align == "right": para.alignment = WD_ALIGN_PARAGRAPH.RIGHT
        else: para.alignment = WD_ALIGN_PARAGRAPH.LEFT
        if "space_before" in block: para.paragraph_format.space_before = Pt(block["space_before"])
        if "space_after" in block: para.paragraph_format.space_after = Pt(block["space_after"])
        if "line_spacing" in block: para.paragraph_format.line_spacing = block["line_spacing"]
    elif block["type"] == "table":
        rows = block.get("rows", [])
        if rows:
            try:
                section = doc.sections[0]
                table_width = section.page_width
            except Exception:
                table_width = Inches(6)
            table = doc.add_table(rows=len(rows), cols=len(rows[0]), width=table_width)
            for i, row in enumerate(rows):
                for j, cell_blocks in enumerate(row):
                    cell = table.cell(i, j)
                    for para_block in cell_blocks:
                        add_block_to_doc(cell, para_block, image_dir)
    elif block["type"] == "image":
        img_path = os.path.join(image_dir, block["path"])
        width = block.get("width")
        height = block.get("height")
        if os.path.exists(img_path):
            if width and height:
                doc.add_picture(img_path, width=Inches(width/96), height=Inches(height/96))
            else:
                doc.add_picture(img_path)

def add_blocks_to_doc(doc, blocks, image_dir):
    for block in blocks:
        # If doc is a header/footer, use add_paragraph directly
        if hasattr(doc, 'is_header') or hasattr(doc, 'is_footer') or hasattr(doc, 'add_paragraph'):
            add_block_to_doc(doc, block, image_dir)
        else:
            # If doc is a SectionHeader or SectionFooter (python-docx), use .add_paragraph()
            try:
                add_block_to_doc(doc, block, image_dir)
            except Exception:
                pass

def extract_all_sections(doc, output_dir, image_prefix):
    sections = []
    for idx, section in enumerate(doc.sections):
        sec = {}
        for htype, attr in [("header", "header"), ("first_page_header", "first_page_header"), ("even_page_header", "even_page_header"),
                            ("footer", "footer"), ("first_page_footer", "first_page_footer"), ("even_page_footer", "even_page_footer")]:
            part = getattr(section, attr, None)
            if part:
                sec[htype] = extract_blocks(part, output_dir, f"{image_prefix}_sec{idx}_{htype}")
        sections.append(sec)
    return sections

def convert_document(doc_file, target_format):
    import json
    from docx import Document as DocxDocument
    try:
        target_format = target_format.lower()
        orig_file_path = None
        # Handle Gradio NamedString or file-like object
        if hasattr(doc_file, 'name'):
            orig_file_path = doc_file.name
        elif isinstance(doc_file, str):
            orig_file_path = doc_file
        else:
            return None, "Error: Unsupported file type.", None
        # If the file is a PDF, convert it to DOCX first
        if orig_file_path.lower().endswith('.pdf'):
            print("Converting PDF to DOCX...")
            doc_file = convert_pdf_to_docx(orig_file_path)
            print("PDF converted to DOCX.")
            orig_file_path = doc_file
        base_name = os.path.splitext(os.path.basename(orig_file_path))[0]
        output_file = f"docgen_{base_name}.{target_format.lower()}"
        # Custom DOCX to JSON extraction
        if orig_file_path.lower().endswith('.docx') and target_format == 'json':
            doc = Document(orig_file_path)
            output_dir = os.path.dirname(output_file)
            image_prefix = base_name
            image_blocks = extract_images_from_doc(doc, output_dir, image_prefix)
            sections = extract_all_sections(doc, output_dir, image_prefix)
            body_blocks = extract_blocks(doc, output_dir, image_prefix)
            doc_json = {
                "sections": sections,
                "body": body_blocks + image_blocks,
                "metadata": {
                    "title": getattr(doc.core_properties, 'title', ''),
                    "author": getattr(doc.core_properties, 'author', ''),
                }
            }
            with open(output_file, 'w', encoding='utf-8') as f:
                json.dump(doc_json, f, ensure_ascii=False, indent=2)
        elif orig_file_path.lower().endswith('.json') and target_format == 'docx':
            # JSON to DOCX
            with open(orig_file_path, 'r', encoding='utf-8') as f:
                doc_json = json.load(f)
            doc = DocxDocument()
            image_dir = os.path.dirname(orig_file_path)
            # Sections (headers/footers)
            if "sections" in doc_json:
                # Ensure doc has enough sections
                while len(doc.sections) < len(doc_json["sections"]):
                    doc.add_section()
                for idx, sec in enumerate(doc_json["sections"]):
                    section = doc.sections[idx]
                    for htype, attr in [("header", "header"), ("first_page_header", "first_page_header"), ("even_page_header", "even_page_header"),
                                        ("footer", "footer"), ("first_page_footer", "first_page_footer"), ("even_page_footer", "even_page_footer")]:
                        if htype in sec:
                            part = getattr(section, attr, None)
                            if part:
                                # Remove all default paragraphs
                                for p in list(part.paragraphs):
                                    p._element.getparent().remove(p._element)
                                add_blocks_to_doc(part, sec[htype], image_dir)
            # Body
            if "body" in doc_json:
                add_blocks_to_doc(doc, doc_json["body"], image_dir)
            # Metadata
            if "metadata" in doc_json:
                meta = doc_json["metadata"]
                if "title" in meta:
                    doc.core_properties.title = meta["title"]
                if "author" in meta:
                    doc.core_properties.author = meta["author"]
            doc.save(output_file)
        else:
            # Use Pandoc for other conversions
            pypandoc.convert_file(
                orig_file_path,
                target_format.lower(),
                outputfile=output_file,
            )
        input_preview = get_preview(orig_file_path)
        output_preview = get_preview(output_file)
        return input_preview, output_preview, output_file
    except Exception as e:
        return f"Error: {e}", None, None

def parity_check(docx_path):
    import tempfile
    print(f"[Parity Check] Testing round-trip for: {docx_path}")
    class FileLike:  # Fake file-like for CLI
        def __init__(self, name): self.name = name
    _, _, json_out = convert_document(FileLike(docx_path), 'json')
    if not json_out or not os.path.exists(json_out):
        print("Failed to produce JSON from DOCX.")
        return False
    _, _, docx_out = convert_document(FileLike(json_out), 'docx')
    if not docx_out or not os.path.exists(docx_out):
        print("Failed to produce DOCX from JSON.")
        return False
    def extract_all_sections_for_parity(docx_path):
        doc = Document(docx_path)
        sections = []
        for idx, section in enumerate(doc.sections):
            sec = {}
            for htype, attr in [("header", "header"), ("first_page_header", "first_page_header"), ("even_page_header", "even_page_header"),
                                ("footer", "footer"), ("first_page_footer", "first_page_footer"), ("even_page_footer", "even_page_footer")]:
                part = getattr(section, attr, None)
                if part:
                    sec[htype] = extract_blocks(part, os.path.dirname(docx_path), f"sec{idx}_{htype}")
            sections.append(sec)
        body = extract_blocks(doc, os.path.dirname(docx_path), os.path.splitext(os.path.basename(docx_path))[0])
        return {"sections": sections, "body": body}
    orig = extract_all_sections_for_parity(docx_path)
    roundtrip = extract_all_sections_for_parity(docx_out)
    import difflib, pprint
    def blocks_to_str(blocks):
        return pprint.pformat(blocks, width=120)
    if orig == roundtrip:
        print("[Parity Check] PASS: Round-trip blocks are identical!")
        return True
    else:
        print("[Parity Check] FAIL: Differences found.")
        # Compare per section
        for idx, (orig_sec, round_sec) in enumerate(zip(orig["sections"], roundtrip["sections"])):
            if orig_sec != round_sec:
                print(f"Section {idx} header/footer mismatch:")
                diff = difflib.unified_diff(blocks_to_str(orig_sec).splitlines(), blocks_to_str(round_sec).splitlines(), fromfile='original', tofile='roundtrip', lineterm='')
                print('\n'.join(diff))
        if orig["body"] != roundtrip["body"]:
            print("Body mismatch:")
            diff = difflib.unified_diff(blocks_to_str(orig["body"]).splitlines(), blocks_to_str(roundtrip["body"]).splitlines(), fromfile='original', tofile='roundtrip', lineterm='')
            print('\n'.join(diff))
        return False

with gr.Blocks(css="footer {visibility: hidden}") as demo:
    gr.Markdown("# Document Format Converter\nUpload a document and preview as JSON. Select a format to download in another format.")
    with gr.Row():
        with gr.Column():
            input_file = gr.File(label="Upload Document", file_types=[f'.{ext.lower()}' for ext in input_supported_formats])
            input_preview = gr.HTML(label="JSON Preview")
        with gr.Column():
            output_format = gr.Dropdown(label="Download As...", choices=output_supported_formats, value="DOCX")
            format_label = gr.Markdown("Previewing as: DOCX")
            output_preview = gr.HTML(label="Output Preview")
            output_file = gr.File(label="Download Converted Document", visible=True)
    json_state = gr.State()
    orig_file_state = gr.State()

    def upload_and_preview(doc_file):
        _, _, json_path = convert_document(doc_file, "json")
        # Handle conversion failure
        if not json_path or not os.path.exists(json_path):
            error_msg = "Error converting document to JSON."
            return f"<pre style='max-height:300px;overflow:auto'>{error_msg}</pre>", "", doc_file.name
        # Read and preview JSON content
        try:
            with open(json_path, "r", encoding="utf-8") as f:
                json_content = f.read()
        except Exception as e:
            error_msg = f"Error reading JSON: {e}"
            return f"<pre style='max-height:300px;overflow:auto'>{error_msg}</pre>", "", doc_file.name
        preview_html = f"<pre style='max-height:300px;overflow:auto'>{json_content[:4000]}</pre>"
        return preview_html, json_content, doc_file.name

    def convert_and_preview(orig_file_path, output_format):
        class F:
            name = orig_file_path
        _, _, out_path = convert_document(F(), output_format.lower())
        preview = get_preview(out_path)
        return f"Previewing as: {output_format}", preview, out_path

    input_file.upload(upload_and_preview, inputs=input_file, outputs=[input_preview, json_state, orig_file_state])
    output_format.change(convert_and_preview, inputs=[orig_file_state, output_format], outputs=[format_label, output_preview, output_file])

if __name__ == "__main__":
    if len(sys.argv) == 3 and sys.argv[1] == "--parity-check":
        parity_check(sys.argv[2])
        sys.exit(0)
    
    # Generate a random API key if one doesn't exist in environment variables
    API_KEY = os.environ.get('API_KEY', secrets.token_urlsafe(32))
    print(f"API Key: {API_KEY}")  # Print the API key when the app starts

    # Create Flask app for API endpoints
    app = Flask(__name__)
    
    def check_api_key():
        """Check if the API key is valid."""
        provided_key = request.headers.get('X-API-Key')
        if not provided_key or provided_key != API_KEY:
            return False
        return True
    
    @app.route('/api/docx-to-json', methods=['POST'])
    def api_docx_to_json():
        # Check API key
        if not check_api_key():
            return jsonify({"error": "Invalid or missing API key"}), 401
            
        if 'file' not in request.files:
            return jsonify({"error": "No file part"}), 400
        
        file = request.files['file']
        if file.filename == '':
            return jsonify({"error": "No selected file"}), 400
        
        if not file.filename.lower().endswith('.docx'):
            return jsonify({"error": "File must be a DOCX document"}), 400
        
        # Save the uploaded file
        temp_dir = tempfile.mkdtemp()
        file_path = os.path.join(temp_dir, file.filename)
        file.save(file_path)
        
        try:
            # Convert to JSON
            _, _, json_path = convert_document(type('obj', (object,), {'name': file_path}), "json")
            
            if not json_path or not os.path.exists(json_path):
                return jsonify({"error": "Error converting document to JSON"}), 500
            
            # Read JSON content
            with open(json_path, "r", encoding="utf-8") as f:
                json_content = json.load(f)
            
            return jsonify(json_content)
        except Exception as e:
            return jsonify({"error": str(e)}), 500
    
    @app.route('/api/json-to-docx', methods=['POST'])
    def api_json_to_docx():
        # Check API key
        if not check_api_key():
            return jsonify({"error": "Invalid or missing API key"}), 401
            
        if not request.is_json:
            return jsonify({"error": "Request must be JSON"}), 400
        
        try:
            # Save the JSON to a temporary file
            temp_dir = tempfile.mkdtemp()
            json_path = os.path.join(temp_dir, "document.json")
            
            with open(json_path, "w", encoding="utf-8") as f:
                json.dump(request.json, f)
            
            # Convert to DOCX
            _, _, docx_path = convert_document(type('obj', (object,), {'name': json_path}), "docx")
            
            if not docx_path or not os.path.exists(docx_path):
                return jsonify({"error": "Error converting JSON to DOCX"}), 500
            
            return send_file(docx_path, as_attachment=True, download_name="converted.docx")
        except Exception as e:
            return jsonify({"error": str(e)}), 500
    
    # Run both Gradio and Flask
    def run_flask():
        app.run(host='0.0.0.0', port=5000)
    
    # Start Flask in a separate thread
    flask_thread = threading.Thread(target=run_flask)
    flask_thread.daemon = True
    flask_thread.start()
    
    # Start Gradio
    demo.launch(share=True)