File size: 6,302 Bytes
6d0a866
7006fce
 
6d0a866
 
7006fce
6d0a866
7006fce
 
6d0a866
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7006fce
 
6d0a866
 
7006fce
6d0a866
 
 
 
7006fce
6d0a866
7006fce
6d0a866
 
 
 
 
7006fce
6d0a866
 
 
 
 
7006fce
6d0a866
 
7006fce
 
6d0a866
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7006fce
6d0a866
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7006fce
6d0a866
 
2d42e8d
6d0a866
 
 
 
7006fce
6d0a866
 
 
 
 
 
 
 
 
7006fce
6d0a866
 
 
 
 
7006fce
6d0a866
7006fce
6d0a866
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
import gradio as gr
import os
import tempfile
import logging
from summarizer_tool import AllInOneDispatcher # Import your dispatcher class

# Configure logging for the Gradio app
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

# Initialize the dispatcher globally.
# This ensures models are loaded only once when the Gradio app starts up.
# This can take time, especially on CPU.
try:
    dispatcher = AllInOneDispatcher()
    logging.info("AllInOneDispatcher initialized successfully.")
except Exception as e:
    logging.error(f"Failed to initialize AllInOneDispatcher: {e}")
    # You might want to display an error message in the Gradio app itself
    # or raise a more specific error for deployment.

# --- Gradio Interface Functions ---

# Function to handle Text Processing
def process_text_task(text_input: str, task_name: str, max_summary_len: int, min_summary_len: int, max_gen_tokens: int, num_gen_sequences: int, tts_lang: str):
    """Handles various text-based AI tasks."""
    if not text_input.strip():
        return "Please enter some text.", None # Return None for audio output

    kwargs = {}
    if task_name == "summarization":
        kwargs["max_length"] = max_summary_len
        kwargs["min_length"] = min_summary_len
    elif task_name == "text-generation":
        kwargs["max_new_tokens"] = max_gen_tokens
        kwargs["num_return_sequences"] = num_gen_sequences
    elif task_name == "tts":
        kwargs["lang"] = tts_lang

    try:
        logging.info(f"Processing text with task: {task_name}")
        result = dispatcher.process(text_input, task=task_name, **kwargs)

        if task_name == "tts":
            # For TTS, dispatcher.process returns a file path
            if os.path.exists(result):
                return "Speech generated successfully!", result # Return text message and audio file path
            else:
                return "TTS failed to generate audio.", None
        else:
            # For other text tasks, return the string representation of the result
            return str(result), None
    except Exception as e:
        logging.error(f"Error processing text: {e}")
        return f"An error occurred: {e}", None

# Function to handle File Processing
def process_file_task(file_obj, task_name: str):
    """Handles image, audio, PDF, and limited video processing."""
    if file_obj is None:
        return "Please upload a file."

    # Gradio provides a NamedTemporaryFile object directly, we use its name
    file_path = file_obj.name

    try:
        logging.info(f"Processing file '{file_path}' with task: {task_name}")
        result = dispatcher.process(file_path, task=task_name)

        # Handle specific output types if needed (e.g., image overlay for object detection)
        # For now, we'll return string representation.
        if task_name == "automatic-speech-recognition":
            return result.get('text', 'No transcription found.')
        elif task_name == "video":
            # Video analysis returns a dict with image and audio results
            return f"Video Analysis Result:\nImage Analysis: {result.get('image')}\nAudio Analysis: {result.get('audio')}"
        else:
            return str(result) # Convert other results to string for display

    except NotImplementedError as e:
        logging.error(f"Task not implemented: {e}")
        return f"Task not fully implemented: {e}. Video processing is complex and requires system-level ffmpeg."
    except ValueError as e:
        logging.error(f"Value error processing file: {e}")
        return f"Error processing file: {e}. Ensure the file type matches the selected task."
    except Exception as e:
        logging.error(f"An unexpected error occurred during file processing: {e}")
        return f"An unexpected error occurred: {e}"


# --- Gradio Interface Definition ---

# Text Processing Tab
text_tab_inputs = [
    gr.Textbox(lines=8, label="Enter Text", placeholder="Type your text here for summarization, sentiment analysis, etc."),
    gr.Dropdown(
        ["sentiment-analysis", "summarization", "text-generation", "tts", "translation_en_to_fr"],
        label="Select Text Task",
        value="sentiment-analysis"
    ),
    gr.Slider(minimum=10, maximum=200, value=50, step=1, label="Max Summary Length (for Summarization)"),
    gr.Slider(minimum=5, maximum=100, value=10, step=1, label="Min Summary Length (for Summarization)"),
    gr.Slider(minimum=10, maximum=200, value=50, step=1, label="Max Generated Tokens (for Text Generation)"),
    gr.Slider(minimum=1, maximum=3, value=1, step=1, label="Number of Sequences (for Text Generation)"),
    gr.Dropdown(["en", "fr", "es"], label="TTS Language", value="en")
]
text_tab_outputs = [
    gr.Textbox(label="Analysis Result / Generated Text"),
    gr.Audio(label="Generated Speech (for TTS)", type="filepath")
]
text_interface = gr.Interface(
    fn=process_text_task,
    inputs=text_tab_inputs,
    outputs=text_tab_outputs,
    title="📝 Text Processing",
    description="Perform various NLP tasks like sentiment analysis, summarization, text generation, and text-to-speech."
)

# File Processing Tab
file_tab_inputs = [
    gr.File(label="Upload File", type="filepath", file_types=[".pdf", ".mp3", ".wav", ".jpg", ".jpeg", ".png", ".mov", ".mp4"]),
    gr.Dropdown(
        ["image-classification", "object-detection", "automatic-speech-recognition", "pdf", "video"],
        label="Select File Task",
        value="image-classification"
    )
]
file_tab_outputs = gr.Textbox(label="File Processing Result")
file_interface = gr.Interface(
    fn=process_file_task,
    inputs=file_tab_inputs,
    outputs=file_tab_outputs,
    title="📁 File Processing",
    description="Upload an image, audio, PDF, or video file for AI analysis."
)

# Combine interfaces into a Tabbed Interface
demo = gr.TabbedInterface(
    [text_interface, file_interface],
    ["Text Analyzer", "File Analyzer"]
)

# --- Launch the Gradio App ---
if __name__ == "__main__":
    # For local testing, use demo.launch()
    # For Hugging Face Spaces, ensure `gradio`, `requests` and other dependencies
    # from summarizer_tool.py are in requirements.txt
    demo.launch(share=True) # share=True creates a public link for easy sharing (temporary)