import os from pathlib import Path from urllib.parse import urlparse import convertapi import gradio as gr from dotenv import load_dotenv load_dotenv() convertapi.api_credentials = os.getenv("CONVERTAPI_TOKEN") if not convertapi.api_credentials: raise ValueError("CONVERTAPI_TOKEN is required") # Configuration constants MAX_FILE_SIZE = int(os.getenv("MAX_FILE_SIZE", 10 * 1024 * 1024)) # 10MB default TEMP_DIR = os.getenv("TEMP_DIR", "temp") # Get all supported formats from ConvertAPI def get_supported_formats(): try: # Common formats that ConvertAPI supports return [ "pdf", "docx", "doc", "txt", "rtf", "odt", "html", "epub", "png", "jpg", "jpeg", "gif", "bmp", "tiff", "webp", "svg", "xlsx", "xls", "csv", "ods", "pptx", "ppt", "odp", "mp4", "avi", "mov", "wmv", "flv", "webm", "mp3", "wav", "flac", "aac", "ogg", ] except Exception: # Fallback to basic formats return ["pdf", "docx", "txt", "png", "jpg", "xlsx", "pptx"] SUPPORTED_FORMATS = get_supported_formats() # Function to convert a local file def convert_local_file(local_file_path: str, output_format: str) -> str: """ Convert a local document to a different format. Args: local_file_path (str): The path to the input file provided by Gradio. output_format (str): The format to convert the file to. Returns: str: The path to the converted file or an error message. """ try: # Input validation if not local_file_path or not os.path.exists(local_file_path): error_msg = "Invalid or non-existent local file path." return error_msg if output_format not in SUPPORTED_FORMATS: error_msg = f"Unsupported output format: {output_format}" return error_msg # Check file size file_size = os.path.getsize(local_file_path) if file_size > MAX_FILE_SIZE: error_msg = f"File too large: {file_size / (1024 * 1024):.1f}MB (max {MAX_FILE_SIZE / (1024 * 1024):.0f}MB)" return error_msg # Convert the local file result = convertapi.convert(output_format, {"File": local_file_path}) # Derive filename from local path input_filename = Path(local_file_path).stem # Save the converted file to a temporary location output_filename = f"{input_filename}.{output_format}" output_path = Path(TEMP_DIR) / output_filename output_path.parent.mkdir(exist_ok=True) result.file.save(str(output_path)) # Return the file path string on success return str(output_path) except Exception as e: error_msg = f"Error converting file: {str(e)}" return error_msg # Function to convert a file from a URL def convert_from_url(document_url: str, output_format: str) -> str: """ Convert a document from a URL to a different format. Args: document_url (str): The URL of the input file. output_format (str): The format to convert the file to. Returns: str: The path to the converted file or an error message. """ try: # Input validation if not document_url or not ( document_url.lower().startswith("http://") or document_url.lower().startswith("https://") ): error_msg = "Invalid or unsupported URL format." return error_msg if output_format not in SUPPORTED_FORMATS: error_msg = f"Unsupported output format: {output_format}" return error_msg # Convert the file from a URL result = convertapi.convert(output_format, {"File": document_url}) # Attempt to derive a filename from the URL for saving try: url_path = urlparse(document_url).path input_filename = ( Path(url_path).stem or "converted_file" ) # Fallback for complex URLs or no path except Exception: input_filename = "converted_file" # Save the converted file to a temporary location output_filename = f"{input_filename}.{output_format}" output_path = Path(TEMP_DIR) / output_filename output_path.parent.mkdir(exist_ok=True) result.file.save(str(output_path)) # Return the file path string on success return str(output_path) except Exception as e: error_msg = f"Error converting file from URL: {str(e)}" return error_msg # Create Gradio interfaces local_file_demo = gr.Interface( fn=convert_local_file, inputs=[ gr.File(label="Upload File"), gr.Dropdown( choices=SUPPORTED_FORMATS, label="Output Format", value="pdf", ), ], outputs=gr.File(label="Download Converted File"), description="Convert documents from your local file system to different formats.", ) url_demo = gr.Interface( fn=convert_from_url, inputs=[ gr.Textbox( label="Document URL", placeholder="Enter the URL of the document to convert (e.g., https://example.com/document.pdf)", value="", ), gr.Dropdown( choices=SUPPORTED_FORMATS, label="Output Format", value="pdf", ), ], outputs=gr.File(label="Download Converted File"), description="Download and convert documents directly from URLs to different formats.", ) # Create tabbed interface demo = gr.TabbedInterface( [local_file_demo, url_demo], ["Local Files", "Online Files"], title="Universal Document Converter", ) if __name__ == "__main__": # Launch without MCP server to avoid initialization issues demo.launch(share=False, mcp_server=True, debug=True)