File size: 20,295 Bytes
b2b4dfa
 
d4af98c
4403ebb
b2b4dfa
 
 
 
4403ebb
 
 
d4af98c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b2b4dfa
 
4403ebb
b2b4dfa
 
e1eb15d
 
 
 
 
 
 
 
 
d4af98c
 
 
 
4403ebb
b2b4dfa
 
d4af98c
b2b4dfa
 
 
 
 
e1eb15d
b2b4dfa
e1eb15d
b2b4dfa
 
d4af98c
b2b4dfa
 
 
 
 
 
 
 
 
 
e1eb15d
 
 
 
 
 
 
 
b2b4dfa
e1eb15d
b2b4dfa
 
d4af98c
b2b4dfa
d4af98c
b2b4dfa
 
 
d4af98c
b2b4dfa
 
d4af98c
 
 
 
 
 
 
 
 
 
 
 
 
 
b2b4dfa
 
 
 
 
d4af98c
b2b4dfa
 
d4af98c
 
4403ebb
d4af98c
 
 
4403ebb
d4af98c
 
 
 
4403ebb
b2b4dfa
 
 
4403ebb
d4af98c
 
 
 
4403ebb
d4af98c
4403ebb
 
 
d4af98c
 
 
4403ebb
d4af98c
4403ebb
d4af98c
 
4403ebb
d4af98c
 
 
 
4403ebb
d4af98c
 
 
 
e1eb15d
d4af98c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b2b4dfa
 
 
4403ebb
b2b4dfa
 
4403ebb
b2b4dfa
d4af98c
 
 
 
 
 
 
 
 
 
 
 
 
b2b4dfa
d4af98c
 
 
 
b2b4dfa
 
d4af98c
 
 
b2b4dfa
 
d4af98c
b2b4dfa
 
 
d4af98c
b2b4dfa
 
 
 
 
d4af98c
b2b4dfa
d4af98c
b2b4dfa
 
d4af98c
b2b4dfa
 
d4af98c
b2b4dfa
 
d4af98c
 
b2b4dfa
 
d4af98c
b2b4dfa
 
d4af98c
b2b4dfa
 
 
 
 
 
 
d4af98c
b2b4dfa
 
 
d4af98c
b2b4dfa
d4af98c
b2b4dfa
d4af98c
b2b4dfa
d4af98c
b2b4dfa
 
 
d4af98c
 
 
 
 
 
 
 
 
 
 
 
 
 
b2b4dfa
d4af98c
 
 
 
 
 
 
 
 
4403ebb
 
 
 
d4af98c
 
 
4403ebb
d4af98c
 
 
 
 
e1eb15d
b2b4dfa
d4af98c
 
 
 
 
 
 
 
 
 
 
 
b2b4dfa
 
 
d4af98c
 
b2b4dfa
 
 
d4af98c
 
 
 
 
 
 
 
 
e1eb15d
d4af98c
 
 
 
 
 
 
 
 
 
 
 
e1eb15d
b2b4dfa
e1eb15d
 
b2b4dfa
d4af98c
b2b4dfa
 
 
 
 
 
 
 
 
 
 
4403ebb
b2b4dfa
e1eb15d
 
b2b4dfa
4403ebb
b2b4dfa
4403ebb
 
 
b2b4dfa
4403ebb
b2b4dfa
 
4403ebb
b2b4dfa
4403ebb
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
import os
import gradio as gr
import re
import logging # Added for debugging
from src.core.transformer import TranscriptTransformer
from src.utils.pdf_processor import PDFProcessor
from src.utils.text_processor import TextProcessor

# Set up basic logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

# Translations dictionary for UI elements
TRANSLATIONS = {
    "en": {
        "title": "AI Script Generator",
        "subtitle": "Transform transcripts and PDFs into timed, structured teaching scripts using AI",
        "input_type_label": "Input Type",
        "input_type_options": ["PDF", "Raw Text"],
        "upload_pdf_label": "Upload Transcript (PDF)",
        "paste_text_label": "Paste Transcript Text",
        "paste_text_placeholder": "Paste your transcript text here...",
        "guiding_prompt_label": "Guiding Prompt (Optional)",
        "guiding_prompt_placeholder": "Additional instructions to customize the output. Examples: 'Use a more informal tone', 'Focus only on section X', 'Generate the content in Spanish', 'Include more practical programming examples', etc.",
        "guiding_prompt_info": "The Guiding Prompt allows you to provide specific instructions to modify the generated content, like output/desired LANGUAGE. You can use it to change the tone, style, focus ONLY on specific sections of the text, specify the output language (e.g., 'Generate in Spanish/French/German'), or give any other instruction that helps personalize the final result.",
        "duration_label": "Target Lecture Duration (minutes)",
        "examples_label": "Include Practical Examples",
        "submit_button": "Transform Transcript",
        "output_label": "Generated Teaching Transcript",
        "error_no_pdf": "Error: No PDF file uploaded",
        "error_no_text": "Error: No text provided",
        "error_prefix": "Error processing transcript: ",
        "language_selector": "Language / Idioma",
        "show_timestamps": "Show Timestamps",
        "hide_timestamps": "Hide Timestamps"
    },
    "es": {
        "title": "Generador de Guiones IA",
        "subtitle": "Transforma transcripciones y PDFs en guiones de enseñanza estructurados y cronometrados usando IA",
        "input_type_label": "Tipo de Entrada",
        "input_type_options": ["PDF", "Texto"],
        "upload_pdf_label": "Subir Transcripción (PDF)",
        "paste_text_label": "Pegar Texto de Transcripción",
        "paste_text_placeholder": "Pega tu texto de transcripción aquí...",
        "guiding_prompt_label": "Instrucciones Guía (Opcional)",
        "guiding_prompt_placeholder": "Instrucciones adicionales para personalizar el resultado. Ejemplos: 'Usa un tono más informal', 'Enfócate solo en la sección X', 'Genera el contenido en inglés', 'Incluye más ejemplos prácticos de programación', etc.",
        "guiding_prompt_info": "Las Instrucciones Guía te permiten proporcionar indicaciones específicas para modificar el contenido generado, como el IDIOMA deseado. Puedes usarlas para cambiar el tono, estilo, enfocarte SOLO en secciones específicas del texto, especificar el idioma de salida (ej., 'Generar en inglés/francés/alemán'), o dar cualquier otra instrucción que ayude a personalizar el resultado final.",
        "duration_label": "Duración Objetivo de la Clase (minutos)",
        "examples_label": "Incluir Ejemplos Prácticos",
        "submit_button": "Transformar Transcripción",
        "output_label": "Guión de Enseñanza Generado",
        "error_no_pdf": "Error: No se ha subido ningún archivo PDF",
        "error_no_text": "Error: No se ha proporcionado texto",
        "error_prefix": "Error al procesar la transcripción: ",
        "language_selector": "Language / Idioma",
        "show_timestamps": "Mostrar Marcas de Tiempo",
        "hide_timestamps": "Ocultar Marcas de Tiempo"
    }
}

# Language-specific prompt suffixes to append automatically
LANGUAGE_PROMPTS = {
    "en": "",  # Default language doesn't need special instructions
    "es": "Generate the content in Spanish. Genera todo el contenido en español."
}

class TranscriptTransformerApp:
    def __init__(self):
        logging.info("Initializing TranscriptTransformerApp...")
        self.pdf_processor = PDFProcessor()
        self.text_processor = TextProcessor()
        # Initialize transformer directly (it always uses Gemini now)
        try:
            self.transformer = TranscriptTransformer()
        except ValueError as e:
            # Handle case where GEMINI_API_KEY might be missing during init
            logging.error(f"Failed to initialize Transformer: {e}")
            # Optionally, set self.transformer to None and handle in process_transcript
            # Or re-raise / exit, depending on desired behavior
            self.transformer = None # Indicate initialization failure
        self.current_language = "en"  # Default language
        self.last_generated_content = ""  # Store the last generated content
        self.content_with_timestamps = ""  # Store content with timestamps
        self.content_without_timestamps = ""  # Store content without timestamps
        logging.info("TranscriptTransformerApp initialized.")

    def process_transcript(self, 
                           language: str,
                           input_type: str,
                           file_obj: gr.File = None,
                           raw_text_input: str = "",
                           initial_prompt: str = "",
                           target_duration: int = 30,
                           include_examples: bool = True) -> str:
        """
        Process uploaded transcript and transform it into a teaching transcript using Gemini.
        
        Args:
            language: Selected UI language
            input_type: Type of input (PDF or Raw Text)
            file_obj: Uploaded PDF file (if input_type is PDF)
            raw_text_input: Raw text input (if input_type is Raw Text)
            initial_prompt: Additional guiding instructions for the content generation
            target_duration: Target lecture duration in minutes
            include_examples: Whether to include practical examples
            
        Returns:
            str: Generated teaching transcript
        """
        logging.info(f"Processing transcript. Lang: {language}, Type: {input_type}, HasFile: {file_obj is not None}, HasText: {bool(raw_text_input)}, Duration: {target_duration}, Examples: {include_examples}")
        
        # Check if transformer initialized correctly
        if self.transformer is None:
             logging.error("Transformer not initialized, likely missing API key.")
             # Return an error message appropriate for the UI language
             return TRANSLATIONS[language].get("error_prefix", "Error: ") + "Configuration error (API Key missing?)"
             
        try:
            # No need to initialize transformer here anymore
            
            # Get text based on input type
            if input_type == TRANSLATIONS[language]["input_type_options"][0]:  # PDF
                if file_obj is None:
                    return TRANSLATIONS[language]["error_no_pdf"]
                raw_text = self.pdf_processor.extract_text(file_obj.name)
            else:  # Raw Text
                if not raw_text_input.strip():
                    return TRANSLATIONS[language]["error_no_text"]
                raw_text = raw_text_input
            
            # Modify initial prompt based on language if no explicit language instruction is given
            modified_prompt = initial_prompt
            
            # Check if user has specified a language in the prompt
            language_keywords = ["spanish", "español", "english", "inglés", "french", "francés", "german", "alemán"]
            user_specified_language = any(keyword in initial_prompt.lower() for keyword in language_keywords)
            
            # Only append language instruction if user hasn't specified one and we have a non-default language
            if not user_specified_language and language in LANGUAGE_PROMPTS and LANGUAGE_PROMPTS[language]:
                if modified_prompt:
                    modified_prompt += " " + LANGUAGE_PROMPTS[language]
                else:
                    modified_prompt = LANGUAGE_PROMPTS[language]
            
            # Transform to teaching transcript with user guidance
            lecture_transcript = self.transformer.transform_to_lecture(
                text=raw_text,
                target_duration=target_duration,
                include_examples=include_examples,
                initial_prompt=modified_prompt
            )
            
            # Store the generated content
            self.content_with_timestamps = lecture_transcript
            logging.info("Generated content stored (with timestamps).")
            
            # Create a version without timestamps
            self.content_without_timestamps = self.remove_timestamps(lecture_transcript)
            logging.info("Generated content stored (without timestamps).")
            
            # Default: show content with timestamps
            self.last_generated_content = lecture_transcript
            
            logging.info("Transcript processing successful.")
            return lecture_transcript
            
        except Exception as e:
            logging.error(f"Error processing transcript: {e}", exc_info=True) # Log exception info
            return f"{TRANSLATIONS[language]['error_prefix']}{str(e)}"
    
    def remove_timestamps(self, text):
        """Remove all timestamps (e.g., [00:00]) from the text"""
        logging.info("Removing timestamps...")
        # Regex to match the timestamp pattern [MM:SS] or [HH:MM:SS]
        result = re.sub(r'\[\d{1,2}:\d{2}(:\d{2})?\]', '', text)
        logging.info("Timestamps removed.")
        return result
    
    def toggle_timestamps(self, show_timestamps):
        """Toggle visibility of timestamps in output"""
        logging.info(f"Toggling timestamps visibility. Show: {show_timestamps}")
        if show_timestamps:
            logging.info("Returning content WITH timestamps.")
            return self.content_with_timestamps
        else:
            logging.info("Returning content WITHOUT timestamps.")
            return self.content_without_timestamps
    
    def update_ui_language(self, language):
        """Update UI elements based on selected language"""
        logging.info(f"Updating UI language to: {language}")
        self.current_language = language
        
        translations = TRANSLATIONS[language]
        
        logging.info("UI language updated.")
        return [
            translations["title"],
            translations["subtitle"],
            translations["input_type_label"],
            gr.update(choices=translations["input_type_options"], value=translations["input_type_options"][0]),
            translations["upload_pdf_label"],
            translations["paste_text_label"],
            translations["paste_text_placeholder"],
            translations["guiding_prompt_label"],
            translations["guiding_prompt_placeholder"],
            translations["guiding_prompt_info"],
            translations["duration_label"],
            translations["examples_label"],
            translations["submit_button"],
            translations["output_label"]
        ]

    def launch(self):
        """Launch the Gradio interface"""
        logging.info("Configuring Gradio interface...")
        # Get the path to the example PDF
        example_pdf = os.path.join(os.path.dirname(os.path.dirname(__file__)), "data", "sample2.pdf")
        logging.info(f"Example PDF path: {example_pdf}")
        
        with gr.Blocks(title=TRANSLATIONS["en"]["title"]) as interface:
            # Header with title and language selector side by side
            with gr.Row():
                with gr.Column(scale=4):
                    title_md = gr.Markdown("# " + TRANSLATIONS["en"]["title"])
                with gr.Column(scale=1):
                    language_selector = gr.Dropdown(
                        choices=["🇺🇸 English", "🇪🇸 Español"],
                        value="🇺🇸 English",
                        label=TRANSLATIONS["en"]["language_selector"],
                        elem_id="language-selector",
                        interactive=True
                    )
            
            # Subtitle
            subtitle_md = gr.Markdown(TRANSLATIONS["en"]["subtitle"])
            
            # Input type row
            with gr.Row():
                input_type = gr.Radio(
                    choices=TRANSLATIONS["en"]["input_type_options"],
                    label=TRANSLATIONS["en"]["input_type_label"],
                    value=TRANSLATIONS["en"]["input_type_options"][0]
                )
            
            # File/text input columns
            with gr.Row():
                with gr.Column(visible=True) as pdf_column:
                    file_input = gr.File(
                        label=TRANSLATIONS["en"]["upload_pdf_label"],
                        file_types=[".pdf"]
                    )
                
                with gr.Column(visible=False) as text_column:
                    text_input = gr.Textbox(
                        label=TRANSLATIONS["en"]["paste_text_label"],
                        lines=10,
                        placeholder=TRANSLATIONS["en"]["paste_text_placeholder"]
                    )
            
            # Guiding prompt 
            with gr.Row():
                initial_prompt = gr.Textbox(
                    label=TRANSLATIONS["en"]["guiding_prompt_label"],
                    lines=3,
                    value="",
                    placeholder=TRANSLATIONS["en"]["guiding_prompt_placeholder"],
                    info=TRANSLATIONS["en"]["guiding_prompt_info"]
                )
            
            # Settings row
            with gr.Row():
                target_duration = gr.Number(
                    label=TRANSLATIONS["en"]["duration_label"],
                    value=30,
                    minimum=2,
                    maximum=60,
                    step=1
                )
                
                include_examples = gr.Checkbox(
                    label=TRANSLATIONS["en"]["examples_label"],
                    value=True
                )
            
            # Submit button
            with gr.Row():
                submit_btn = gr.Button(TRANSLATIONS["en"]["submit_button"])
            
            # Output area
            output = gr.Textbox(
                label=TRANSLATIONS["en"]["output_label"],
                lines=25
            )
            
            # Toggle timestamps button and Copy button
            with gr.Row():
                timestamps_checkbox = gr.Checkbox(
                    label=TRANSLATIONS["en"]["show_timestamps"],
                    value=True,
                    interactive=True
                )
            
            # Map language dropdown values to language codes
            lang_map = {
                "🇺🇸 English": "en",
                "🇪🇸 Español": "es"
            }
            
            # Handle visibility of input columns based on selection
            def update_input_visibility(language_display, choice):
                language = lang_map.get(language_display, "en")
                return [
                    gr.update(visible=(choice == TRANSLATIONS[language]["input_type_options"][0])),  # pdf_column
                    gr.update(visible=(choice == TRANSLATIONS[language]["input_type_options"][1]))  # text_column
                ]
            
            # Get language code from display value
            def get_language_code(language_display):
                logging.info(f"Getting language code for display value: {language_display}")
                code = lang_map.get(language_display, "en")
                logging.info(f"Language code: {code}")
                return code
            
            # Update UI elements when language changes
            def update_ui_with_display(language_display):
                logging.info(f"Update UI triggered for language: {language_display}")
                language = get_language_code(language_display)
                self.current_language = language
                
                translations = TRANSLATIONS[language]
                
                logging.info("UI elements update values prepared.")
                return [
                    "# " + translations["title"],  # Title with markdown formatting
                    translations["subtitle"],
                    translations["input_type_label"],
                    gr.update(choices=translations["input_type_options"], value=translations["input_type_options"][0], label=translations["input_type_label"]),
                    gr.update(label=translations["upload_pdf_label"]),
                    gr.update(label=translations["paste_text_label"], placeholder=translations["paste_text_placeholder"]),
                    gr.update(label=translations["guiding_prompt_label"], placeholder=translations["guiding_prompt_placeholder"], info=translations["guiding_prompt_info"]),
                    gr.update(label=translations["duration_label"]),
                    gr.update(label=translations["examples_label"]),
                    translations["submit_button"],
                    gr.update(label=translations["output_label"]),
                    gr.update(label=translations["show_timestamps"])
                ]
            
            input_type.change(
                fn=lambda lang_display, choice: update_input_visibility(lang_display, choice),
                inputs=[language_selector, input_type],
                outputs=[pdf_column, text_column]
            )
            
            # Language change event
            language_selector.change(
                fn=update_ui_with_display,
                inputs=language_selector,
                outputs=[
                    title_md, subtitle_md, 
                    input_type, input_type,
                    file_input, text_input,
                    initial_prompt,
                    target_duration, include_examples,
                    submit_btn, output,
                    timestamps_checkbox
                ]
            )
            
            # Toggle timestamps event
            timestamps_checkbox.change(
                fn=self.toggle_timestamps,
                inputs=[timestamps_checkbox],
                outputs=[output]
            )
            
            # Set up submission logic
            submit_btn.click(
                fn=lambda lang_display, input_type_val, file_obj_val, text_input_val, initial_prompt_val, target_duration_val, include_examples_val: \
                    self.process_transcript(get_language_code(lang_display), input_type_val, file_obj_val, text_input_val, initial_prompt_val, target_duration_val, include_examples_val),
                inputs=[
                    language_selector,
                    input_type,
                    file_input,
                    text_input,
                    initial_prompt,
                    target_duration,
                    include_examples,
                ],
                outputs=output
            )
            
            # Example for PDF input
            logging.info("Setting up Gradio Examples...")
            gr.Examples(
                examples=[[example_pdf, "", "", 30, True]], 
                inputs=[file_input, text_input, initial_prompt, target_duration, include_examples]
            )
            logging.info("Gradio Examples configured.")
        
        logging.info("Launching Gradio interface...")
        # Note: Setting share=True is not recommended/supported in Spaces, but kept for consistency with original code
        # It might generate a warning, which is expected.
        interface.launch(share=True)
        logging.info("Gradio interface launched.")

if __name__ == "__main__":
    logging.info("Starting application...")
    app = TranscriptTransformerApp()
    app.launch()
    logging.info("Application finished.")