import gradio as gr import os import tempfile import logging from summarizer_tool import AllInOneDispatcher # Import your dispatcher class # Configure logging for the Gradio app logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') # Initialize the dispatcher globally. # This ensures models are loaded only once when the Gradio app starts up. # This can take time, especially on CPU. try: dispatcher = AllInOneDispatcher() logging.info("AllInOneDispatcher initialized successfully.") except Exception as e: logging.error(f"Failed to initialize AllInOneDispatcher: {e}") # You might want to display an error message in the Gradio app itself # or raise a more specific error for deployment. # --- Gradio Interface Functions --- # Function to handle Text Processing def process_text_task(text_input: str, task_name: str, max_summary_len: int, min_summary_len: int, max_gen_tokens: int, num_gen_sequences: int, tts_lang: str): """Handles various text-based AI tasks.""" if not text_input.strip(): return "Please enter some text.", None # Return None for audio output kwargs = {} if task_name == "summarization": kwargs["max_length"] = max_summary_len kwargs["min_length"] = min_summary_len elif task_name == "text-generation": kwargs["max_new_tokens"] = max_gen_tokens kwargs["num_return_sequences"] = num_gen_sequences elif task_name == "tts": kwargs["lang"] = tts_lang try: logging.info(f"Processing text with task: {task_name}") result = dispatcher.process(text_input, task=task_name, **kwargs) if task_name == "tts": # For TTS, dispatcher.process returns a file path if os.path.exists(result): return "Speech generated successfully!", result # Return text message and audio file path else: return "TTS failed to generate audio.", None else: # For other text tasks, return the string representation of the result return str(result), None except Exception as e: logging.error(f"Error processing text: {e}") return f"An error occurred: {e}", None # Function to handle File Processing def process_file_task(file_obj, task_name: str): """Handles image, audio, PDF, and limited video processing.""" if file_obj is None: return "Please upload a file." # Gradio provides a NamedTemporaryFile object directly, we use its name file_path = file_obj.name try: logging.info(f"Processing file '{file_path}' with task: {task_name}") result = dispatcher.process(file_path, task=task_name) # Handle specific output types if needed (e.g., image overlay for object detection) # For now, we'll return string representation. if task_name == "automatic-speech-recognition": return result.get('text', 'No transcription found.') elif task_name == "video": # Video analysis returns a dict with image and audio results return f"Video Analysis Result:\nImage Analysis: {result.get('image')}\nAudio Analysis: {result.get('audio')}" else: return str(result) # Convert other results to string for display except NotImplementedError as e: logging.error(f"Task not implemented: {e}") return f"Task not fully implemented: {e}. Video processing is complex and requires system-level ffmpeg." except ValueError as e: logging.error(f"Value error processing file: {e}") return f"Error processing file: {e}. Ensure the file type matches the selected task." except Exception as e: logging.error(f"An unexpected error occurred during file processing: {e}") return f"An unexpected error occurred: {e}" # --- Gradio Interface Definition --- # Text Processing Tab text_tab_inputs = [ gr.Textbox(lines=8, label="Enter Text", placeholder="Type your text here for summarization, sentiment analysis, etc."), gr.Dropdown( ["sentiment-analysis", "summarization", "text-generation", "tts", "translation_en_to_fr"], label="Select Text Task", value="sentiment-analysis" ), gr.Slider(minimum=10, maximum=200, value=50, step=1, label="Max Summary Length (for Summarization)"), gr.Slider(minimum=5, maximum=100, value=10, step=1, label="Min Summary Length (for Summarization)"), gr.Slider(minimum=10, maximum=200, value=50, step=1, label="Max Generated Tokens (for Text Generation)"), gr.Slider(minimum=1, maximum=3, value=1, step=1, label="Number of Sequences (for Text Generation)"), gr.Dropdown(["en", "fr", "es"], label="TTS Language", value="en") ] text_tab_outputs = [ gr.Textbox(label="Analysis Result / Generated Text"), gr.Audio(label="Generated Speech (for TTS)", type="filepath") ] text_interface = gr.Interface( fn=process_text_task, inputs=text_tab_inputs, outputs=text_tab_outputs, title="📝 Text Processing", description="Perform various NLP tasks like sentiment analysis, summarization, text generation, and text-to-speech." ) # File Processing Tab file_tab_inputs = [ gr.File(label="Upload File", type="filepath", file_types=[".pdf", ".mp3", ".wav", ".jpg", ".jpeg", ".png", ".mov", ".mp4"]), gr.Dropdown( ["image-classification", "object-detection", "automatic-speech-recognition", "pdf", "video"], label="Select File Task", value="image-classification" ) ] file_tab_outputs = gr.Textbox(label="File Processing Result") file_interface = gr.Interface( fn=process_file_task, inputs=file_tab_inputs, outputs=file_tab_outputs, title="📁 File Processing", description="Upload an image, audio, PDF, or video file for AI analysis." ) # Combine interfaces into a Tabbed Interface demo = gr.TabbedInterface( [text_interface, file_interface], ["Text Analyzer", "File Analyzer"] ) # --- Launch the Gradio App --- if __name__ == "__main__": # For local testing, use demo.launch() # For Hugging Face Spaces, ensure `gradio`, `requests` and other dependencies # from summarizer_tool.py are in requirements.txt demo.launch(share=True) # share=True creates a public link for easy sharing (temporary)