Spaces:

ObiJuanCodenobi
/

docgen

Sleeping

docgen / app.py

Juan Palomino

Deploy Document Format Converter

225be25 9 months ago

27.8 kB

	import gradio as gr
	import pypandoc
	import os
	from pdf2docx import Converter
	from docx import Document
	from docx.table import _Cell
	from docx.shared import Inches, Pt, RGBColor
	from docx.enum.text import WD_ALIGN_PARAGRAPH, WD_COLOR_INDEX
	from docx.oxml.ns import qn
	import json
	import base64
	import hashlib
	import sys
	import tempfile
	from flask import Flask, request, jsonify, send_file
	import threading
	import secrets

	os.system('sudo apt-get install texlive')

	def ensure_pandoc_installed():
	try:
	# Periksa apakah pandoc sudah ada
	pypandoc.get_pandoc_version()
	print("Pandoc is already installed and accessible.")
	except OSError:
	# Unduh pandoc jika belum ada
	print("Pandoc not found, downloading...")
	pypandoc.download_pandoc()
	print("Pandoc downloaded successfully.")

	# Pastikan Pandoc terpasang
	ensure_pandoc_installed()

	# Daftar format yang didukung
	input_supported_formats = [data.upper() for data in sorted(list(pypandoc.get_pandoc_formats()[0]).append('PDF') or [
	'BIBLATEX', 'BIBTEX', 'BITS', 'COMMONMARK', 'COMMONMARK_X', 'CREOLE', 'CSLJSON', 'CSV',
	'DJOT', 'DOCBOOK', 'DOCX', 'DOKUWIKI', 'ENDNOTEXML', 'EPUB', 'FB2', 'GFM', 'HADDOCK',
	'HTML', 'IPYNB', 'JATS', 'JIRA', 'JSON', 'LATEX', 'MAN', 'MARKDOWN', 'MARKDOWN_GITHUB',
	'MARKDOWN_MMD', 'MARKDOWN_PHPEXTRA', 'MARKDOWN_STRICT', 'MDOC', 'MEDIAWIKI', 'MUSE',
	'NATIVE', 'ODT', 'OPML', 'ORG', 'PDF', 'POD', 'RIS', 'RST', 'RTF', 'T2T', 'TEXTILE',
	'TIKIWIKI', 'TSV', 'TWIKI', 'TYPST', 'VIMWIKI'
	]) if data not in ['PDF']]

	output_supported_formats = [data.upper() for data in sorted([
	"ANSI", "ASCIIDOC", "ASCIIDOC_LEGACY", "ASCIIDOCTOR", "BEAMER", "BIBLATEX", "BIBTEX", "CHUNKEDHTML",
	"COMMONMARK", "COMMONMARK_X", "CONTEXT", "CSLJSON", "DJOT", "DOCBOOK", "DOCBOOK4", "DOCBOOK5",
	"DOCX", "DOKUWIKI", "DZSLIDES", "EPUB", "EPUB2", "EPUB3", "FB2", "GFM", "HADDOCK", "HTML",
	"HTML4", "HTML5", "ICML", "IPYNB", "JATS", "JATS_ARCHIVING", "JATS_ARTICLEAUTHORING",
	"JATS_PUBLISHING", "JIRA", "JSON", "LATEX", "MAN", "MARKDOWN", "MARKDOWN_GITHUB",
	"MARKDOWN_MMD", "MARKDOWN_PHPEXTRA", "MARKDOWN_STRICT", "MARKUA", "MEDIAWIKI", "MS",
	"MUSE", "NATIVE", "ODT", "OPENDOCUMENT", "OPML", "ORG", "PDF", "PLAIN", "PPTX", "REVEALJS",
	"RST", "RTF", "S5", "SLIDEOUS", "SLIDY", "TEI", "TEXINFO", "TEXTILE", "TYPST", "XWIKI", "ZIMWIKI"
	]) if data not in ['PDF']]

	def convert_pdf_to_docx(pdf_file):
	output_docx = f"{os.path.splitext(pdf_file)[0]}.docx"
	cv = Converter(pdf_file)
	cv.convert(output_docx, start=0, end=None)
	return output_docx

	def get_preview(file_path):
	ext = os.path.splitext(file_path)[1].lower()
	try:
	if ext in ['.txt', '.md', '.csv', '.json']:
	with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
	content = f.read(2000) # Preview first 2000 chars
	return f"<pre style='max-height:300px;overflow:auto'>{content}</pre>"
	elif ext == '.pdf':
	# Show PDF inline using HTML embed
	return f"<embed src='{file_path}' type='application/pdf' width='100%' height='400px' />"
	elif ext == '.docx':
	try:
	doc = Document(file_path)
	html = ""
	# Extract header(s) with paragraphs and tables
	headers = []
	for section in doc.sections:
	header_texts = []
	# Paragraphs
	for p in section.header.paragraphs:
	if p.text.strip():
	header_texts.append(p.text.strip())
	# Tables
	for table in section.header.tables:
	for row in table.rows:
	row_text = " ".join(cell.text.strip() for cell in row.cells if cell.text.strip())
	if row_text:
	header_texts.append(row_text)
	if header_texts:
	headers.append(" \| ".join(header_texts))
	if headers:
	html += f"<div style='font-weight:bold;font-size:1.2em;margin-bottom:8px;'>{' \| '.join(headers)}</div>"
	para_count = 0
	for para in doc.paragraphs:
	text = para.text.strip()
	if text:
	html += f"<p>{text}</p>"
	para_count += 1
	if para_count > 30:
	html += "<p><i>Preview truncated...</i></p>"
	break
	return f"<div style='max-height:300px;overflow:auto'>{html}</div>"
	except Exception as e:
	return f"<b>Error reading DOCX:</b> {e}"
	elif ext == '.doc':
	return f"<b>DOC file:</b> {os.path.basename(file_path)} (Preview not supported)"
	else:
	return f"<b>File:</b> {os.path.basename(file_path)} (Preview not supported)"
	except Exception as e:
	return f"<b>Error generating preview:</b> {e}"

	def extract_runs(paragraph):
	runs = []
	for run in paragraph.runs:
	run_data = {
	"text": run.text
	}
	if run.bold:
	run_data["bold"] = True
	if run.italic:
	run_data["italic"] = True
	if run.underline:
	run_data["underline"] = True
	if run.font and run.font.size:
	run_data["font_size"] = run.font.size.pt
	if run.font and run.font.name:
	run_data["font_name"] = run.font.name
	# Extract color (RGB or theme)
	if run.font and run.font.color:
	if run.font.color.rgb:
	run_data["color"] = str(run.font.color.rgb)
	elif run.font.color.theme_color:
	run_data["color_theme"] = str(run.font.color.theme_color)
	# Highlight color
	if run.font and hasattr(run.font, "highlight_color") and run.font.highlight_color:
	run_data["highlight"] = str(run.font.highlight_color)
	runs.append(run_data)
	return runs

	# Detect heading and list paragraphs
	def extract_paragraph_block(paragraph):
	style_name = paragraph.style.name if paragraph.style else "Normal"
	# Heading
	if style_name.startswith("Heading"):
	try:
	level = int(style_name.split()[-1])
	except Exception:
	level = 1
	return {
	"type": "heading",
	"level": level,
	"runs": extract_runs(paragraph),
	"alignment": str(paragraph.alignment) if paragraph.alignment else "left",
	"style": style_name
	}
	# List
	elif "List" in style_name:
	return {
	"type": "list_item",
	"list_type": "number" if "Number" in style_name else "bullet",
	"runs": extract_runs(paragraph),
	"alignment": str(paragraph.alignment) if paragraph.alignment else "left",
	"style": style_name
	}
	# Normal paragraph
	else:
	return {
	"type": "paragraph",
	"runs": extract_runs(paragraph),
	"alignment": str(paragraph.alignment) if paragraph.alignment else "left",
	"style": style_name
	}

	# Add spacing extraction
	def extract_blocks(element, output_dir, image_prefix):
	blocks = []
	if hasattr(element, 'paragraphs'):
	for para in element.paragraphs:
	if para.text.strip():
	para_block = extract_paragraph_block(para)
	# Add spacing info
	pf = para.paragraph_format
	if pf:
	if pf.space_before:
	para_block["space_before"] = pf.space_before.pt
	if pf.space_after:
	para_block["space_after"] = pf.space_after.pt
	if pf.line_spacing:
	para_block["line_spacing"] = pf.line_spacing
	blocks.append(para_block)
	if hasattr(element, 'tables'):
	for table in element.tables:
	blocks.append(extract_table_block(table))
	return blocks

	def extract_table_block(table):
	rows = []
	for row in table.rows:
	row_cells = []
	for cell in row.cells:
	# Only take unique paragraphs (python-docx repeats cell objects)
	unique_paras = []
	seen = set()
	for para in cell.paragraphs:
	para_id = id(para)
	if para_id not in seen:
	unique_paras.append(para)
	seen.add(para_id)
	row_cells.append([extract_paragraph_block(para) for para in unique_paras if para.text.strip()])
	rows.append(row_cells)
	return {"type": "table", "rows": rows}

	def extract_images_from_doc(doc, output_dir, image_prefix):
	image_blocks = []
	rels = doc.part.rels
	for rel in rels.values():
	if rel.reltype == 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/image':
	img_blob = rel.target_part.blob
	img_hash = hashlib.sha1(img_blob).hexdigest()[:8]
	img_ext = rel.target_part.content_type.split('/')[-1]
	img_id = f"{image_prefix}_{img_hash}"
	img_filename = f"{img_id}.{img_ext}"
	img_path = os.path.join(output_dir, img_filename)
	with open(img_path, 'wb') as f:
	f.write(img_blob)
	image_blocks.append({
	"type": "image",
	"image_id": img_id,
	"image_format": img_ext,
	"path": img_filename
	})
	return image_blocks

	def add_runs_to_paragraph(paragraph, runs):
	for run_info in runs:
	run = paragraph.add_run(run_info.get("text", ""))
	if run_info.get("bold"): run.bold = True
	if run_info.get("italic"): run.italic = True
	if run_info.get("underline"): run.underline = True
	if run_info.get("font_size"): run.font.size = Pt(run_info["font_size"])
	if run_info.get("font_name"): run.font.name = run_info["font_name"]
	# Set color (RGB or theme)
	if run_info.get("color"):
	try:
	run.font.color.rgb = RGBColor.from_string(run_info["color"].replace("#", ""))
	except Exception:
	pass
	elif run_info.get("color_theme"):
	try:
	run.font.color.theme_color = int(run_info["color_theme"])
	except Exception:
	pass
	if run_info.get("highlight"):
	try:
	if run_info["highlight"].isdigit():
	run.font.highlight_color = int(run_info["highlight"])
	else:
	run.font.highlight_color = WD_COLOR_INDEX[run_info["highlight"]]
	except Exception:
	pass

	# Add heading and list support
	def add_block_to_doc(doc, block, image_dir):
	if block["type"] == "heading":
	level = block.get("level", 1)
	text = "".join([r.get("text", "") for r in block.get("runs", [])])
	para = doc.add_heading(text, level=level)
	add_runs_to_paragraph(para, block.get("runs", []))
	align = block.get("alignment", "left")
	if align == "center": para.alignment = WD_ALIGN_PARAGRAPH.CENTER
	elif align == "right": para.alignment = WD_ALIGN_PARAGRAPH.RIGHT
	else: para.alignment = WD_ALIGN_PARAGRAPH.LEFT
	# Spacing
	if "space_before" in block: para.paragraph_format.space_before = Pt(block["space_before"])
	if "space_after" in block: para.paragraph_format.space_after = Pt(block["space_after"])
	if "line_spacing" in block: para.paragraph_format.line_spacing = block["line_spacing"]
	elif block["type"] == "list_item":
	style = "List Number" if block.get("list_type") == "number" else "List Bullet"
	para = doc.add_paragraph(style=style)
	add_runs_to_paragraph(para, block.get("runs", []))
	align = block.get("alignment", "left")
	if align == "center": para.alignment = WD_ALIGN_PARAGRAPH.CENTER
	elif align == "right": para.alignment = WD_ALIGN_PARAGRAPH.RIGHT
	else: para.alignment = WD_ALIGN_PARAGRAPH.LEFT
	if "space_before" in block: para.paragraph_format.space_before = Pt(block["space_before"])
	if "space_after" in block: para.paragraph_format.space_after = Pt(block["space_after"])
	if "line_spacing" in block: para.paragraph_format.line_spacing = block["line_spacing"]
	elif block["type"] == "paragraph":
	para = doc.add_paragraph()
	add_runs_to_paragraph(para, block.get("runs", []))
	align = block.get("alignment", "left")
	if align == "center": para.alignment = WD_ALIGN_PARAGRAPH.CENTER
	elif align == "right": para.alignment = WD_ALIGN_PARAGRAPH.RIGHT
	else: para.alignment = WD_ALIGN_PARAGRAPH.LEFT
	if "space_before" in block: para.paragraph_format.space_before = Pt(block["space_before"])
	if "space_after" in block: para.paragraph_format.space_after = Pt(block["space_after"])
	if "line_spacing" in block: para.paragraph_format.line_spacing = block["line_spacing"]
	elif block["type"] == "table":
	rows = block.get("rows", [])
	if rows:
	try:
	section = doc.sections[0]
	table_width = section.page_width
	except Exception:
	table_width = Inches(6)
	table = doc.add_table(rows=len(rows), cols=len(rows[0]), width=table_width)
	for i, row in enumerate(rows):
	for j, cell_blocks in enumerate(row):
	cell = table.cell(i, j)
	for para_block in cell_blocks:
	add_block_to_doc(cell, para_block, image_dir)
	elif block["type"] == "image":
	img_path = os.path.join(image_dir, block["path"])
	width = block.get("width")
	height = block.get("height")
	if os.path.exists(img_path):
	if width and height:
	doc.add_picture(img_path, width=Inches(width/96), height=Inches(height/96))
	else:
	doc.add_picture(img_path)

	def add_blocks_to_doc(doc, blocks, image_dir):
	for block in blocks:
	# If doc is a header/footer, use add_paragraph directly
	if hasattr(doc, 'is_header') or hasattr(doc, 'is_footer') or hasattr(doc, 'add_paragraph'):
	add_block_to_doc(doc, block, image_dir)
	else:
	# If doc is a SectionHeader or SectionFooter (python-docx), use .add_paragraph()
	try:
	add_block_to_doc(doc, block, image_dir)
	except Exception:
	pass

	def extract_all_sections(doc, output_dir, image_prefix):
	sections = []
	for idx, section in enumerate(doc.sections):
	sec = {}
	for htype, attr in [("header", "header"), ("first_page_header", "first_page_header"), ("even_page_header", "even_page_header"),
	("footer", "footer"), ("first_page_footer", "first_page_footer"), ("even_page_footer", "even_page_footer")]:
	part = getattr(section, attr, None)
	if part:
	sec[htype] = extract_blocks(part, output_dir, f"{image_prefix}_sec{idx}_{htype}")
	sections.append(sec)
	return sections

	def convert_document(doc_file, target_format):
	import json
	from docx import Document as DocxDocument
	try:
	target_format = target_format.lower()
	orig_file_path = None
	# Handle Gradio NamedString or file-like object
	if hasattr(doc_file, 'name'):
	orig_file_path = doc_file.name
	elif isinstance(doc_file, str):
	orig_file_path = doc_file
	else:
	return None, "Error: Unsupported file type.", None
	# If the file is a PDF, convert it to DOCX first
	if orig_file_path.lower().endswith('.pdf'):
	print("Converting PDF to DOCX...")
	doc_file = convert_pdf_to_docx(orig_file_path)
	print("PDF converted to DOCX.")
	orig_file_path = doc_file
	base_name = os.path.splitext(os.path.basename(orig_file_path))[0]
	output_file = f"docgen_{base_name}.{target_format.lower()}"
	# Custom DOCX to JSON extraction
	if orig_file_path.lower().endswith('.docx') and target_format == 'json':
	doc = Document(orig_file_path)
	output_dir = os.path.dirname(output_file)
	image_prefix = base_name
	image_blocks = extract_images_from_doc(doc, output_dir, image_prefix)
	sections = extract_all_sections(doc, output_dir, image_prefix)
	body_blocks = extract_blocks(doc, output_dir, image_prefix)
	doc_json = {
	"sections": sections,
	"body": body_blocks + image_blocks,
	"metadata": {
	"title": getattr(doc.core_properties, 'title', ''),
	"author": getattr(doc.core_properties, 'author', ''),
	}
	}
	with open(output_file, 'w', encoding='utf-8') as f:
	json.dump(doc_json, f, ensure_ascii=False, indent=2)
	elif orig_file_path.lower().endswith('.json') and target_format == 'docx':
	# JSON to DOCX
	with open(orig_file_path, 'r', encoding='utf-8') as f:
	doc_json = json.load(f)
	doc = DocxDocument()
	image_dir = os.path.dirname(orig_file_path)
	# Sections (headers/footers)
	if "sections" in doc_json:
	# Ensure doc has enough sections
	while len(doc.sections) < len(doc_json["sections"]):
	doc.add_section()
	for idx, sec in enumerate(doc_json["sections"]):
	section = doc.sections[idx]
	for htype, attr in [("header", "header"), ("first_page_header", "first_page_header"), ("even_page_header", "even_page_header"),
	("footer", "footer"), ("first_page_footer", "first_page_footer"), ("even_page_footer", "even_page_footer")]:
	if htype in sec:
	part = getattr(section, attr, None)
	if part:
	# Remove all default paragraphs
	for p in list(part.paragraphs):
	p._element.getparent().remove(p._element)
	add_blocks_to_doc(part, sec[htype], image_dir)
	# Body
	if "body" in doc_json:
	add_blocks_to_doc(doc, doc_json["body"], image_dir)
	# Metadata
	if "metadata" in doc_json:
	meta = doc_json["metadata"]
	if "title" in meta:
	doc.core_properties.title = meta["title"]
	if "author" in meta:
	doc.core_properties.author = meta["author"]
	doc.save(output_file)
	else:
	# Use Pandoc for other conversions
	pypandoc.convert_file(
	orig_file_path,
	target_format.lower(),
	outputfile=output_file,
	)
	input_preview = get_preview(orig_file_path)
	output_preview = get_preview(output_file)
	return input_preview, output_preview, output_file
	except Exception as e:
	return f"Error: {e}", None, None

	def parity_check(docx_path):
	import tempfile
	print(f"[Parity Check] Testing round-trip for: {docx_path}")
	class FileLike: # Fake file-like for CLI
	def __init__(self, name): self.name = name
	_, _, json_out = convert_document(FileLike(docx_path), 'json')
	if not json_out or not os.path.exists(json_out):
	print("Failed to produce JSON from DOCX.")
	return False
	_, _, docx_out = convert_document(FileLike(json_out), 'docx')
	if not docx_out or not os.path.exists(docx_out):
	print("Failed to produce DOCX from JSON.")
	return False
	def extract_all_sections_for_parity(docx_path):
	doc = Document(docx_path)
	sections = []
	for idx, section in enumerate(doc.sections):
	sec = {}
	for htype, attr in [("header", "header"), ("first_page_header", "first_page_header"), ("even_page_header", "even_page_header"),
	("footer", "footer"), ("first_page_footer", "first_page_footer"), ("even_page_footer", "even_page_footer")]:
	part = getattr(section, attr, None)
	if part:
	sec[htype] = extract_blocks(part, os.path.dirname(docx_path), f"sec{idx}_{htype}")
	sections.append(sec)
	body = extract_blocks(doc, os.path.dirname(docx_path), os.path.splitext(os.path.basename(docx_path))[0])
	return {"sections": sections, "body": body}
	orig = extract_all_sections_for_parity(docx_path)
	roundtrip = extract_all_sections_for_parity(docx_out)
	import difflib, pprint
	def blocks_to_str(blocks):
	return pprint.pformat(blocks, width=120)
	if orig == roundtrip:
	print("[Parity Check] PASS: Round-trip blocks are identical!")
	return True
	else:
	print("[Parity Check] FAIL: Differences found.")
	# Compare per section
	for idx, (orig_sec, round_sec) in enumerate(zip(orig["sections"], roundtrip["sections"])):
	if orig_sec != round_sec:
	print(f"Section {idx} header/footer mismatch:")
	diff = difflib.unified_diff(blocks_to_str(orig_sec).splitlines(), blocks_to_str(round_sec).splitlines(), fromfile='original', tofile='roundtrip', lineterm='')
	print('\n'.join(diff))
	if orig["body"] != roundtrip["body"]:
	print("Body mismatch:")
	diff = difflib.unified_diff(blocks_to_str(orig["body"]).splitlines(), blocks_to_str(roundtrip["body"]).splitlines(), fromfile='original', tofile='roundtrip', lineterm='')
	print('\n'.join(diff))
	return False

	with gr.Blocks(css="footer {visibility: hidden}") as demo:
	gr.Markdown("# Document Format Converter\nUpload a document and preview as JSON. Select a format to download in another format.")
	with gr.Row():
	with gr.Column():
	input_file = gr.File(label="Upload Document", file_types=[f'.{ext.lower()}' for ext in input_supported_formats])
	input_preview = gr.HTML(label="JSON Preview")
	with gr.Column():
	output_format = gr.Dropdown(label="Download As...", choices=output_supported_formats, value="DOCX")
	format_label = gr.Markdown("Previewing as: DOCX")
	output_preview = gr.HTML(label="Output Preview")
	output_file = gr.File(label="Download Converted Document", visible=True)
	json_state = gr.State()
	orig_file_state = gr.State()

	def upload_and_preview(doc_file):
	_, _, json_path = convert_document(doc_file, "json")
	# Handle conversion failure
	if not json_path or not os.path.exists(json_path):
	error_msg = "Error converting document to JSON."
	return f"<pre style='max-height:300px;overflow:auto'>{error_msg}</pre>", "", doc_file.name
	# Read and preview JSON content
	try:
	with open(json_path, "r", encoding="utf-8") as f:
	json_content = f.read()
	except Exception as e:
	error_msg = f"Error reading JSON: {e}"
	return f"<pre style='max-height:300px;overflow:auto'>{error_msg}</pre>", "", doc_file.name
	preview_html = f"<pre style='max-height:300px;overflow:auto'>{json_content[:4000]}</pre>"
	return preview_html, json_content, doc_file.name

	def convert_and_preview(orig_file_path, output_format):
	class F:
	name = orig_file_path
	_, _, out_path = convert_document(F(), output_format.lower())
	preview = get_preview(out_path)
	return f"Previewing as: {output_format}", preview, out_path

	input_file.upload(upload_and_preview, inputs=input_file, outputs=[input_preview, json_state, orig_file_state])
	output_format.change(convert_and_preview, inputs=[orig_file_state, output_format], outputs=[format_label, output_preview, output_file])

	if __name__ == "__main__":
	if len(sys.argv) == 3 and sys.argv[1] == "--parity-check":
	parity_check(sys.argv[2])
	sys.exit(0)

	# Generate a random API key if one doesn't exist in environment variables
	API_KEY = os.environ.get('API_KEY', secrets.token_urlsafe(32))
	print(f"API Key: {API_KEY}") # Print the API key when the app starts

	# Create Flask app for API endpoints
	app = Flask(__name__)

	def check_api_key():
	"""Check if the API key is valid."""
	provided_key = request.headers.get('X-API-Key')
	if not provided_key or provided_key != API_KEY:
	return False
	return True

	@app.route('/api/docx-to-json', methods=['POST'])
	def api_docx_to_json():
	# Check API key
	if not check_api_key():
	return jsonify({"error": "Invalid or missing API key"}), 401

	if 'file' not in request.files:
	return jsonify({"error": "No file part"}), 400

	file = request.files['file']
	if file.filename == '':
	return jsonify({"error": "No selected file"}), 400

	if not file.filename.lower().endswith('.docx'):
	return jsonify({"error": "File must be a DOCX document"}), 400

	# Save the uploaded file
	temp_dir = tempfile.mkdtemp()
	file_path = os.path.join(temp_dir, file.filename)
	file.save(file_path)

	try:
	# Convert to JSON
	_, _, json_path = convert_document(type('obj', (object,), {'name': file_path}), "json")

	if not json_path or not os.path.exists(json_path):
	return jsonify({"error": "Error converting document to JSON"}), 500

	# Read JSON content
	with open(json_path, "r", encoding="utf-8") as f:
	json_content = json.load(f)

	return jsonify(json_content)
	except Exception as e:
	return jsonify({"error": str(e)}), 500

	@app.route('/api/json-to-docx', methods=['POST'])
	def api_json_to_docx():
	# Check API key
	if not check_api_key():
	return jsonify({"error": "Invalid or missing API key"}), 401

	if not request.is_json:
	return jsonify({"error": "Request must be JSON"}), 400

	try:
	# Save the JSON to a temporary file
	temp_dir = tempfile.mkdtemp()
	json_path = os.path.join(temp_dir, "document.json")

	with open(json_path, "w", encoding="utf-8") as f:
	json.dump(request.json, f)

	# Convert to DOCX
	_, _, docx_path = convert_document(type('obj', (object,), {'name': json_path}), "docx")

	if not docx_path or not os.path.exists(docx_path):
	return jsonify({"error": "Error converting JSON to DOCX"}), 500

	return send_file(docx_path, as_attachment=True, download_name="converted.docx")
	except Exception as e:
	return jsonify({"error": str(e)}), 500

	# Run both Gradio and Flask
	def run_flask():
	app.run(host='0.0.0.0', port=5000)

	# Start Flask in a separate thread
	flask_thread = threading.Thread(target=run_flask)
	flask_thread.daemon = True
	flask_thread.start()

	# Start Gradio
	demo.launch(share=True)