rogeliorichman commited on
Commit
d4af98c
·
verified ·
1 Parent(s): b2b4dfa

Upload folder using huggingface_hub

Browse files
Files changed (3) hide show
  1. README.md +16 -1
  2. src/app.py +235 -29
  3. src/core/transformer.py +244 -126
README.md CHANGED
@@ -10,11 +10,16 @@ sdk_version: 5.13.1
10
  [![Python 3.8+](https://img.shields.io/badge/python-3.8+-blue.svg)](https://www.python.org/downloads/)
11
  [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
12
  [![PRs Welcome](https://img.shields.io/badge/PRs-welcome-brightgreen.svg)](http://makeapullrequest.com)
 
13
 
14
  > Transform transcripts and PDFs into timed, structured teaching scripts using AI
15
 
16
  AI Script Generator is an advanced AI system that converts PDF transcripts, raw text, and conversational content into well-structured teaching scripts. It seamlessly processes inputs, extracting and analyzing the content to create organized, pedagogically sound scripts with time markers. Designed for educators, students, content creators, and anyone looking to transform information into clear explanations.
17
 
 
 
 
 
18
  ## ✨ Features
19
 
20
  - 🤖 PDF transcript and raw text processing
@@ -23,6 +28,8 @@ AI Script Generator is an advanced AI system that converts PDF transcripts, raw
23
  - 🔄 Coherent topic organization
24
  - 🔌 Support for multiple AI providers (Gemini/OpenAI)
25
  - ⏱️ Time-marked sections for pacing
 
 
26
 
27
  ## Output Format
28
 
@@ -220,8 +227,16 @@ Project Link: [https://github.com/RogelioRichmanAstronaut/AI-Script-Generator](h
220
  - [ ] Support for multiple output formats (PDF, PPTX)
221
  - [ ] Interactive elements generation
222
  - [ ] Custom templating system
223
- - [ ] Multi-language support
 
 
 
 
 
 
 
224
  - [ ] Integration with LMS platforms
 
225
 
226
  ---
227
 
 
10
  [![Python 3.8+](https://img.shields.io/badge/python-3.8+-blue.svg)](https://www.python.org/downloads/)
11
  [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
12
  [![PRs Welcome](https://img.shields.io/badge/PRs-welcome-brightgreen.svg)](http://makeapullrequest.com)
13
+ [![Hugging Face Spaces](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue)](https://huggingface.co/spaces/rogeliorichman/AI_Script_Generator)
14
 
15
  > Transform transcripts and PDFs into timed, structured teaching scripts using AI
16
 
17
  AI Script Generator is an advanced AI system that converts PDF transcripts, raw text, and conversational content into well-structured teaching scripts. It seamlessly processes inputs, extracting and analyzing the content to create organized, pedagogically sound scripts with time markers. Designed for educators, students, content creators, and anyone looking to transform information into clear explanations.
18
 
19
+ ## 🔗 Live Demo
20
+
21
+ Try it out: [AI Script Generator on Hugging Face Spaces](https://huggingface.co/spaces/rogeliorichman/AI_Script_Generator)
22
+
23
  ## ✨ Features
24
 
25
  - 🤖 PDF transcript and raw text processing
 
28
  - 🔄 Coherent topic organization
29
  - 🔌 Support for multiple AI providers (Gemini/OpenAI)
30
  - ⏱️ Time-marked sections for pacing
31
+ - 🌐 Multilingual interface (English/Spanish) with flag selector
32
+ - 🌍 Generation in ANY language through the guiding prompt (not limited to UI languages)
33
 
34
  ## Output Format
35
 
 
227
  - [ ] Support for multiple output formats (PDF, PPTX)
228
  - [ ] Interactive elements generation
229
  - [ ] Custom templating system
230
+ - [ ] Copy to clipboard button for generated content
231
+ - [x] Multilingual capabilities
232
+ - [x] Content generation in any language via guiding prompt
233
+ - [x] UI language support
234
+ - [x] English
235
+ - [x] Spanish
236
+ - [ ] French
237
+ - [ ] German
238
  - [ ] Integration with LMS platforms
239
+ - [x] Timestamp toggle - ability to show/hide time markers in the output text
240
 
241
  ---
242
 
src/app.py CHANGED
@@ -1,5 +1,6 @@
1
  import os
2
  import gradio as gr
 
3
  from dotenv import load_dotenv
4
  from src.core.transformer import TranscriptTransformer
5
  from src.utils.pdf_processor import PDFProcessor
@@ -7,12 +8,73 @@ from src.utils.text_processor import TextProcessor
7
 
8
  load_dotenv()
9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  class TranscriptTransformerApp:
11
  def __init__(self):
12
  self.pdf_processor = PDFProcessor()
13
  self.text_processor = TextProcessor()
 
 
 
 
14
 
15
  def process_transcript(self,
 
16
  input_type: str,
17
  file_obj: gr.File = None,
18
  raw_text_input: str = "",
@@ -25,6 +87,7 @@ class TranscriptTransformerApp:
25
  Process uploaded transcript and transform it into a teaching transcript
26
 
27
  Args:
 
28
  input_type: Type of input (PDF or Raw Text)
29
  file_obj: Uploaded PDF file (if input_type is PDF)
30
  raw_text_input: Raw text input (if input_type is Raw Text)
@@ -48,70 +111,146 @@ class TranscriptTransformerApp:
48
  )
49
 
50
  # Get text based on input type
51
- if input_type == "PDF":
52
  if file_obj is None:
53
- return "Error: No PDF file uploaded"
54
  raw_text = self.pdf_processor.extract_text(file_obj.name)
55
  else: # Raw Text
56
  if not raw_text_input.strip():
57
- return "Error: No text provided"
58
  raw_text = raw_text_input
59
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60
  # Transform to teaching transcript with user guidance
61
  lecture_transcript = self.transformer.transform_to_lecture(
62
  text=raw_text,
63
  target_duration=target_duration,
64
  include_examples=include_examples,
65
- initial_prompt=initial_prompt
66
  )
67
 
 
 
 
 
 
 
 
 
 
68
  return lecture_transcript
69
 
70
  except Exception as e:
71
- return f"Error processing transcript: {str(e)}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
 
73
  def launch(self):
74
  """Launch the Gradio interface"""
75
  # Get the path to the example PDF
76
  example_pdf = os.path.join(os.path.dirname(os.path.dirname(__file__)), "data", "sample2.pdf")
77
 
78
- with gr.Blocks(title="AI Script Generator") as interface:
79
- gr.Markdown("# AI Script Generator")
80
- gr.Markdown("Transform transcripts and PDFs into timed, structured teaching scripts using AI")
 
 
 
 
 
 
 
 
 
 
81
 
 
 
 
 
82
  with gr.Row():
83
  input_type = gr.Radio(
84
- choices=["PDF", "Raw Text"],
85
- label="Input Type",
86
- value="PDF"
87
  )
88
 
 
89
  with gr.Row():
90
  with gr.Column(visible=True) as pdf_column:
91
  file_input = gr.File(
92
- label="Upload Transcript (PDF)",
93
  file_types=[".pdf"]
94
  )
95
 
96
  with gr.Column(visible=False) as text_column:
97
  text_input = gr.Textbox(
98
- label="Paste Transcript Text",
99
  lines=10,
100
- placeholder="Paste your transcript text here..."
101
  )
102
 
 
103
  with gr.Row():
104
  initial_prompt = gr.Textbox(
105
- label="Guiding Prompt (Optional)",
106
  lines=3,
107
  value="",
108
- placeholder="Additional instructions to customize the output. Examples: 'Use a more informal tone', 'Focus only on section X', 'Generate the content in Spanish', 'Include more practical programming examples', etc.",
109
- info="The Guiding Prompt allows you to provide specific instructions to modify the generated content, like output/desired LANGUAGE. You can use it to change the tone, style, focus ONLY on specific sections of the text, specify the output language (e.g., 'Generate in Spanish/French/German'), or give any other instruction that helps personalize the final result."
110
  )
111
 
 
112
  with gr.Row():
113
  target_duration = gr.Number(
114
- label="Target Lecture Duration (minutes)",
115
  value=30,
116
  minimum=2,
117
  maximum=60,
@@ -119,40 +258,107 @@ class TranscriptTransformerApp:
119
  )
120
 
121
  include_examples = gr.Checkbox(
122
- label="Include Practical Examples",
123
  value=True
124
  )
125
 
126
  use_thinking_model = gr.Checkbox(
127
- label="Use Experimental Thinking Model (Gemini Only)",
128
  value=True
129
  )
130
 
 
131
  with gr.Row():
132
- submit_btn = gr.Button("Transform Transcript")
133
 
 
134
  output = gr.Textbox(
135
- label="Generated Teaching Transcript",
136
  lines=25
137
  )
138
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
139
  # Handle visibility of input columns based on selection
140
- def update_input_visibility(choice):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
141
  return [
142
- gr.update(visible=(choice == "PDF")), # pdf_column
143
- gr.update(visible=(choice == "Raw Text")) # text_column
 
 
 
 
 
 
 
 
 
 
 
144
  ]
145
 
146
  input_type.change(
147
- fn=update_input_visibility,
148
- inputs=input_type,
149
  outputs=[pdf_column, text_column]
150
  )
151
 
152
- # Set up submission logic
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
153
  submit_btn.click(
154
- fn=self.process_transcript,
155
  inputs=[
 
156
  input_type,
157
  file_input,
158
  text_input,
 
1
  import os
2
  import gradio as gr
3
+ import re
4
  from dotenv import load_dotenv
5
  from src.core.transformer import TranscriptTransformer
6
  from src.utils.pdf_processor import PDFProcessor
 
8
 
9
  load_dotenv()
10
 
11
+ # Translations dictionary for UI elements
12
+ TRANSLATIONS = {
13
+ "en": {
14
+ "title": "AI Script Generator",
15
+ "subtitle": "Transform transcripts and PDFs into timed, structured teaching scripts using AI",
16
+ "input_type_label": "Input Type",
17
+ "input_type_options": ["PDF", "Raw Text"],
18
+ "upload_pdf_label": "Upload Transcript (PDF)",
19
+ "paste_text_label": "Paste Transcript Text",
20
+ "paste_text_placeholder": "Paste your transcript text here...",
21
+ "guiding_prompt_label": "Guiding Prompt (Optional)",
22
+ "guiding_prompt_placeholder": "Additional instructions to customize the output. Examples: 'Use a more informal tone', 'Focus only on section X', 'Generate the content in Spanish', 'Include more practical programming examples', etc.",
23
+ "guiding_prompt_info": "The Guiding Prompt allows you to provide specific instructions to modify the generated content, like output/desired LANGUAGE. You can use it to change the tone, style, focus ONLY on specific sections of the text, specify the output language (e.g., 'Generate in Spanish/French/German'), or give any other instruction that helps personalize the final result.",
24
+ "duration_label": "Target Lecture Duration (minutes)",
25
+ "examples_label": "Include Practical Examples",
26
+ "thinking_model_label": "Use Experimental Thinking Model (Gemini Only)",
27
+ "submit_button": "Transform Transcript",
28
+ "output_label": "Generated Teaching Transcript",
29
+ "error_no_pdf": "Error: No PDF file uploaded",
30
+ "error_no_text": "Error: No text provided",
31
+ "error_prefix": "Error processing transcript: ",
32
+ "language_selector": "Language / Idioma",
33
+ "show_timestamps": "Show Timestamps",
34
+ "hide_timestamps": "Hide Timestamps"
35
+ },
36
+ "es": {
37
+ "title": "Generador de Guiones IA",
38
+ "subtitle": "Transforma transcripciones y PDFs en guiones de enseñanza estructurados y cronometrados usando IA",
39
+ "input_type_label": "Tipo de Entrada",
40
+ "input_type_options": ["PDF", "Texto"],
41
+ "upload_pdf_label": "Subir Transcripción (PDF)",
42
+ "paste_text_label": "Pegar Texto de Transcripción",
43
+ "paste_text_placeholder": "Pega tu texto de transcripción aquí...",
44
+ "guiding_prompt_label": "Instrucciones Guía (Opcional)",
45
+ "guiding_prompt_placeholder": "Instrucciones adicionales para personalizar el resultado. Ejemplos: 'Usa un tono más informal', 'Enfócate solo en la sección X', 'Genera el contenido en inglés', 'Incluye más ejemplos prácticos de programación', etc.",
46
+ "guiding_prompt_info": "Las Instrucciones Guía te permiten proporcionar indicaciones específicas para modificar el contenido generado, como el IDIOMA deseado. Puedes usarlas para cambiar el tono, estilo, enfocarte SOLO en secciones específicas del texto, especificar el idioma de salida (ej., 'Generar en inglés/francés/alemán'), o dar cualquier otra instrucción que ayude a personalizar el resultado final.",
47
+ "duration_label": "Duración Objetivo de la Clase (minutos)",
48
+ "examples_label": "Incluir Ejemplos Prácticos",
49
+ "thinking_model_label": "Usar Modelo de Pensamiento Experimental (Solo Gemini)",
50
+ "submit_button": "Transformar Transcripción",
51
+ "output_label": "Guión de Enseñanza Generado",
52
+ "error_no_pdf": "Error: No se ha subido ningún archivo PDF",
53
+ "error_no_text": "Error: No se ha proporcionado texto",
54
+ "error_prefix": "Error al procesar la transcripción: ",
55
+ "language_selector": "Language / Idioma",
56
+ "show_timestamps": "Mostrar Marcas de Tiempo",
57
+ "hide_timestamps": "Ocultar Marcas de Tiempo"
58
+ }
59
+ }
60
+
61
+ # Language-specific prompt suffixes to append automatically
62
+ LANGUAGE_PROMPTS = {
63
+ "en": "", # Default language doesn't need special instructions
64
+ "es": "Generate the content in Spanish. Genera todo el contenido en español."
65
+ }
66
+
67
  class TranscriptTransformerApp:
68
  def __init__(self):
69
  self.pdf_processor = PDFProcessor()
70
  self.text_processor = TextProcessor()
71
+ self.current_language = "en" # Default language
72
+ self.last_generated_content = "" # Store the last generated content
73
+ self.content_with_timestamps = "" # Store content with timestamps
74
+ self.content_without_timestamps = "" # Store content without timestamps
75
 
76
  def process_transcript(self,
77
+ language: str,
78
  input_type: str,
79
  file_obj: gr.File = None,
80
  raw_text_input: str = "",
 
87
  Process uploaded transcript and transform it into a teaching transcript
88
 
89
  Args:
90
+ language: Selected UI language
91
  input_type: Type of input (PDF or Raw Text)
92
  file_obj: Uploaded PDF file (if input_type is PDF)
93
  raw_text_input: Raw text input (if input_type is Raw Text)
 
111
  )
112
 
113
  # Get text based on input type
114
+ if input_type == TRANSLATIONS[language]["input_type_options"][0]: # PDF
115
  if file_obj is None:
116
+ return TRANSLATIONS[language]["error_no_pdf"]
117
  raw_text = self.pdf_processor.extract_text(file_obj.name)
118
  else: # Raw Text
119
  if not raw_text_input.strip():
120
+ return TRANSLATIONS[language]["error_no_text"]
121
  raw_text = raw_text_input
122
 
123
+ # Modify initial prompt based on language if no explicit language instruction is given
124
+ modified_prompt = initial_prompt
125
+
126
+ # Check if user has specified a language in the prompt
127
+ language_keywords = ["spanish", "español", "english", "inglés", "french", "francés", "german", "alemán"]
128
+ user_specified_language = any(keyword in initial_prompt.lower() for keyword in language_keywords)
129
+
130
+ # Only append language instruction if user hasn't specified one and we have a non-default language
131
+ if not user_specified_language and language in LANGUAGE_PROMPTS and LANGUAGE_PROMPTS[language]:
132
+ if modified_prompt:
133
+ modified_prompt += " " + LANGUAGE_PROMPTS[language]
134
+ else:
135
+ modified_prompt = LANGUAGE_PROMPTS[language]
136
+
137
  # Transform to teaching transcript with user guidance
138
  lecture_transcript = self.transformer.transform_to_lecture(
139
  text=raw_text,
140
  target_duration=target_duration,
141
  include_examples=include_examples,
142
+ initial_prompt=modified_prompt
143
  )
144
 
145
+ # Store the generated content
146
+ self.content_with_timestamps = lecture_transcript
147
+
148
+ # Create a version without timestamps
149
+ self.content_without_timestamps = self.remove_timestamps(lecture_transcript)
150
+
151
+ # Default: show content with timestamps
152
+ self.last_generated_content = lecture_transcript
153
+
154
  return lecture_transcript
155
 
156
  except Exception as e:
157
+ return f"{TRANSLATIONS[language]['error_prefix']}{str(e)}"
158
+
159
+ def remove_timestamps(self, text):
160
+ """Remove all timestamps (e.g., [00:00]) from the text"""
161
+ # Regex to match the timestamp pattern [MM:SS] or [HH:MM:SS]
162
+ return re.sub(r'\[\d{1,2}:\d{2}(:\d{2})?\]', '', text)
163
+
164
+ def toggle_timestamps(self, show_timestamps):
165
+ """Toggle visibility of timestamps in output"""
166
+ if show_timestamps:
167
+ return self.content_with_timestamps
168
+ else:
169
+ return self.content_without_timestamps
170
+
171
+ def update_ui_language(self, language):
172
+ """Update UI elements based on selected language"""
173
+ self.current_language = language
174
+
175
+ translations = TRANSLATIONS[language]
176
+
177
+ return [
178
+ translations["title"],
179
+ translations["subtitle"],
180
+ translations["input_type_label"],
181
+ gr.update(choices=translations["input_type_options"], value=translations["input_type_options"][0]),
182
+ translations["upload_pdf_label"],
183
+ translations["paste_text_label"],
184
+ translations["paste_text_placeholder"],
185
+ translations["guiding_prompt_label"],
186
+ translations["guiding_prompt_placeholder"],
187
+ translations["guiding_prompt_info"],
188
+ translations["duration_label"],
189
+ translations["examples_label"],
190
+ translations["thinking_model_label"],
191
+ translations["submit_button"],
192
+ translations["output_label"]
193
+ ]
194
 
195
  def launch(self):
196
  """Launch the Gradio interface"""
197
  # Get the path to the example PDF
198
  example_pdf = os.path.join(os.path.dirname(os.path.dirname(__file__)), "data", "sample2.pdf")
199
 
200
+ with gr.Blocks(title=TRANSLATIONS["en"]["title"]) as interface:
201
+ # Header with title and language selector side by side
202
+ with gr.Row():
203
+ with gr.Column(scale=4):
204
+ title_md = gr.Markdown("# " + TRANSLATIONS["en"]["title"])
205
+ with gr.Column(scale=1):
206
+ language_selector = gr.Dropdown(
207
+ choices=["🇺🇸 English", "🇪🇸 Español"],
208
+ value="🇺🇸 English",
209
+ label=TRANSLATIONS["en"]["language_selector"],
210
+ elem_id="language-selector",
211
+ interactive=True
212
+ )
213
 
214
+ # Subtitle
215
+ subtitle_md = gr.Markdown(TRANSLATIONS["en"]["subtitle"])
216
+
217
+ # Input type row
218
  with gr.Row():
219
  input_type = gr.Radio(
220
+ choices=TRANSLATIONS["en"]["input_type_options"],
221
+ label=TRANSLATIONS["en"]["input_type_label"],
222
+ value=TRANSLATIONS["en"]["input_type_options"][0]
223
  )
224
 
225
+ # File/text input columns
226
  with gr.Row():
227
  with gr.Column(visible=True) as pdf_column:
228
  file_input = gr.File(
229
+ label=TRANSLATIONS["en"]["upload_pdf_label"],
230
  file_types=[".pdf"]
231
  )
232
 
233
  with gr.Column(visible=False) as text_column:
234
  text_input = gr.Textbox(
235
+ label=TRANSLATIONS["en"]["paste_text_label"],
236
  lines=10,
237
+ placeholder=TRANSLATIONS["en"]["paste_text_placeholder"]
238
  )
239
 
240
+ # Guiding prompt
241
  with gr.Row():
242
  initial_prompt = gr.Textbox(
243
+ label=TRANSLATIONS["en"]["guiding_prompt_label"],
244
  lines=3,
245
  value="",
246
+ placeholder=TRANSLATIONS["en"]["guiding_prompt_placeholder"],
247
+ info=TRANSLATIONS["en"]["guiding_prompt_info"]
248
  )
249
 
250
+ # Settings row
251
  with gr.Row():
252
  target_duration = gr.Number(
253
+ label=TRANSLATIONS["en"]["duration_label"],
254
  value=30,
255
  minimum=2,
256
  maximum=60,
 
258
  )
259
 
260
  include_examples = gr.Checkbox(
261
+ label=TRANSLATIONS["en"]["examples_label"],
262
  value=True
263
  )
264
 
265
  use_thinking_model = gr.Checkbox(
266
+ label=TRANSLATIONS["en"]["thinking_model_label"],
267
  value=True
268
  )
269
 
270
+ # Submit button
271
  with gr.Row():
272
+ submit_btn = gr.Button(TRANSLATIONS["en"]["submit_button"])
273
 
274
+ # Output area
275
  output = gr.Textbox(
276
+ label=TRANSLATIONS["en"]["output_label"],
277
  lines=25
278
  )
279
 
280
+ # Toggle timestamps button and Copy button
281
+ with gr.Row():
282
+ timestamps_checkbox = gr.Checkbox(
283
+ label=TRANSLATIONS["en"]["show_timestamps"],
284
+ value=True,
285
+ interactive=True
286
+ )
287
+
288
+ # Map language dropdown values to language codes
289
+ lang_map = {
290
+ "🇺🇸 English": "en",
291
+ "🇪🇸 Español": "es"
292
+ }
293
+
294
  # Handle visibility of input columns based on selection
295
+ def update_input_visibility(language_display, choice):
296
+ language = lang_map.get(language_display, "en")
297
+ return [
298
+ gr.update(visible=(choice == TRANSLATIONS[language]["input_type_options"][0])), # pdf_column
299
+ gr.update(visible=(choice == TRANSLATIONS[language]["input_type_options"][1])) # text_column
300
+ ]
301
+
302
+ # Get language code from display value
303
+ def get_language_code(language_display):
304
+ return lang_map.get(language_display, "en")
305
+
306
+ # Update UI elements when language changes
307
+ def update_ui_with_display(language_display):
308
+ language = get_language_code(language_display)
309
+ self.current_language = language
310
+
311
+ translations = TRANSLATIONS[language]
312
+
313
  return [
314
+ "# " + translations["title"], # Title with markdown formatting
315
+ translations["subtitle"],
316
+ translations["input_type_label"],
317
+ gr.update(choices=translations["input_type_options"], value=translations["input_type_options"][0], label=translations["input_type_label"]),
318
+ gr.update(label=translations["upload_pdf_label"]),
319
+ gr.update(label=translations["paste_text_label"], placeholder=translations["paste_text_placeholder"]),
320
+ gr.update(label=translations["guiding_prompt_label"], placeholder=translations["guiding_prompt_placeholder"], info=translations["guiding_prompt_info"]),
321
+ gr.update(label=translations["duration_label"]),
322
+ gr.update(label=translations["examples_label"]),
323
+ gr.update(label=translations["thinking_model_label"]),
324
+ translations["submit_button"],
325
+ gr.update(label=translations["output_label"]),
326
+ gr.update(label=translations["show_timestamps"])
327
  ]
328
 
329
  input_type.change(
330
+ fn=lambda lang_display, choice: update_input_visibility(lang_display, choice),
331
+ inputs=[language_selector, input_type],
332
  outputs=[pdf_column, text_column]
333
  )
334
 
335
+ # Language change event
336
+ language_selector.change(
337
+ fn=update_ui_with_display,
338
+ inputs=language_selector,
339
+ outputs=[
340
+ title_md, subtitle_md,
341
+ input_type, input_type,
342
+ file_input, text_input,
343
+ initial_prompt,
344
+ target_duration, include_examples, use_thinking_model,
345
+ submit_btn, output,
346
+ timestamps_checkbox
347
+ ]
348
+ )
349
+
350
+ # Toggle timestamps event
351
+ timestamps_checkbox.change(
352
+ fn=self.toggle_timestamps,
353
+ inputs=[timestamps_checkbox],
354
+ outputs=[output]
355
+ )
356
+
357
+ # Set up submission logic with language code conversion
358
  submit_btn.click(
359
+ fn=lambda lang_display, *args: self.process_transcript(get_language_code(lang_display), *args),
360
  inputs=[
361
+ language_selector,
362
  input_type,
363
  file_input,
364
  text_input,
src/core/transformer.py CHANGED
@@ -1,7 +1,8 @@
1
  import os
2
  import logging
3
  import json
4
- from typing import List, Dict, Optional
 
5
  import openai
6
  from src.utils.text_processor import TextProcessor
7
 
@@ -16,7 +17,9 @@ class WordCountError(Exception):
16
  class TranscriptTransformer:
17
  """Transforms conversational transcripts into teaching material using LLM"""
18
 
19
- MAX_RETRIES = 3 # Maximum retries for content generation
 
 
20
  CHUNK_SIZE = 6000 # Target words per chunk
21
  LARGE_DEVIATION_THRESHOLD = 0.20 # 20% maximum deviation
22
  MAX_TOKENS = 64000 # Nuevo límite absoluto basado en 64k tokens de salida
@@ -54,6 +57,49 @@ class TranscriptTransformer:
54
  # Target word counts
55
  self.words_per_minute = 130 # Average speaking rate
56
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
  def _validate_word_count(self, total_words: int, target_words: int, min_words: int, max_words: int) -> None:
58
  """Validate word count with flexible thresholds and log warnings/errors"""
59
  deviation = abs(total_words - target_words) / target_words
@@ -280,7 +326,11 @@ class TranscriptTransformer:
280
  }
281
  }
282
 
283
- response = self.openai_client.chat.completions.create(**params)
 
 
 
 
284
  content = response.choices[0].message.content.strip()
285
  logger.debug(f"Raw structure response: {content}")
286
 
@@ -308,86 +358,94 @@ class TranscriptTransformer:
308
 
309
  except Exception as e:
310
  logger.error(f"Error generating structure: {str(e)}")
 
311
  return self._generate_fallback_structure(text, target_duration)
312
 
313
  def _generate_fallback_structure(self, text: str, target_duration: int) -> Dict:
314
- """Generate a basic fallback structure when JSON parsing fails"""
315
  logger.info("Generating fallback structure")
316
 
317
- # Generate a simpler structure prompt
318
- prompt = f"""
319
- Analyze this text and provide:
320
- 1. A title (one line)
321
- 2. Three learning objectives (one per line)
322
- 3. Three main topics (one per line)
323
- 4. Three key terms (one per line)
324
-
325
- Text: {text[:1000]}
326
- """
 
 
 
 
 
 
 
 
 
 
 
 
327
 
328
  try:
329
- response = self.openai_client.chat.completions.create(
330
- model=self.model_name,
331
- messages=[
332
- {"role": "system", "content": "You are an expert educator. Provide concise, line-by-line responses."},
333
- {"role": "user", "content": prompt}
334
- ],
335
- temperature=0.7,
336
- max_tokens=1000
337
- )
338
-
339
- lines = response.choices[0].message.content.strip().split('\n')
340
- lines = [line.strip() for line in lines if line.strip()]
341
-
342
- # Extract components from lines
343
- title = lines[0] if lines else "Lecture"
344
- objectives = [obj for obj in lines[1:4] if obj][:3]
345
- topics = [topic for topic in lines[4:7] if topic][:3]
346
- terms = [term for term in lines[7:10] if term][:3]
347
-
348
- # Calculate minutes per topic
349
- main_time = int(target_duration * 0.7) # 70% for main content
350
- topic_minutes = main_time // len(topics) if topics else main_time
351
-
352
- # Create fallback structure
353
- return {
354
- "title": title,
355
- "learning_objectives": objectives,
356
- "topics": [
357
- {
358
- "title": topic,
359
- "key_concepts": [topic], # Use topic as key concept
360
- "subtopics": ["Overview", "Details", "Examples"],
361
- "duration_minutes": topic_minutes,
362
- "objective_links": [1] # Link to first objective
363
- }
364
- for topic in topics
365
- ],
366
- "practical_applications": [
367
- "Real-world application example",
368
- "Interactive exercise",
369
- "Case study"
370
- ],
371
- "key_terms": terms
372
- }
373
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
374
  except Exception as e:
375
  logger.error(f"Error generating fallback structure: {str(e)}")
376
- # Return minimal valid structure
377
  return {
378
- "title": "Lecture Overview",
379
- "learning_objectives": ["Understand key concepts", "Apply knowledge", "Analyze examples"],
380
  "topics": [
381
  {
382
- "title": "Main Topic",
383
- "key_concepts": ["Core concept"],
384
- "subtopics": ["Overview"],
385
  "duration_minutes": target_duration // 2,
386
- "objective_links": [1]
 
 
 
 
 
 
 
387
  }
388
  ],
389
- "practical_applications": ["Practical example"],
390
- "key_terms": ["Key term"]
391
  }
392
 
393
  def _generate_section(self,
@@ -400,24 +458,40 @@ class TranscriptTransformer:
400
  is_first: bool = False,
401
  is_last: bool = False,
402
  initial_prompt: Optional[str] = None) -> str:
403
- """Generate content for a specific section with coherence tracking"""
404
  logger.info(f"Generating {section_type} section (target: {target_words} words)")
405
 
406
- user_instructions = f"\nUser's guiding instructions:\n{initial_prompt}\n" if initial_prompt else ""
 
 
 
 
 
 
 
 
 
 
 
 
407
 
408
- # Base prompt with structure
409
  prompt = f"""
410
- You are an expert educator creating a detailed lecture transcript.
411
  {user_instructions}
412
- Generate the {section_type} section with EXACTLY {target_words} words.
 
 
 
413
 
414
- Lecture Title: {structure_data['title']}
415
- Learning Objectives: {', '.join(structure_data['learning_objectives'])}
416
 
417
- Current section purpose:
 
418
  """
419
 
420
- # Add section-specific guidance
421
  if section_type == 'introduction':
422
  prompt += """
423
  - Start with an engaging hook
@@ -427,66 +501,110 @@ class TranscriptTransformer:
427
  """
428
  elif section_type == 'main':
429
  prompt += f"""
430
- - Cover these topics: {[t['title'] for t in structure_data['topics']]}
431
- - Build progressively on concepts
432
- - Include clear transitions
433
- - Reference previous concepts
 
 
 
 
 
 
 
434
  """
435
  elif section_type == 'practical':
436
- prompt += """
437
- - Apply concepts to real-world scenarios
438
- - Connect to previous topics
439
- - Include interactive elements
440
- - Reinforce key learning points
 
 
 
 
 
441
  """
442
  elif section_type == 'summary':
443
  prompt += """
444
- - Reinforce key takeaways
445
- - Connect back to objectives
446
- - Provide next steps
447
- - End with a strong conclusion
448
- """
449
 
450
- # Add context if available
 
 
 
 
 
 
451
  if context:
452
  prompt += f"""
453
 
454
- Context:
455
- - Covered topics: {', '.join(context['covered_topics'])}
456
- - Pending topics: {', '.join(context['pending_topics'])}
457
- - Key terms used: {', '.join(context['key_terms'])}
458
- - Recent narrative: {context['current_narrative']}
459
- """
460
 
461
- # Add requirements
462
- prompt += f"""
463
-
464
- Requirements:
465
- 1. STRICT word count: Generate EXACTLY {target_words} words
466
- 2. Include practical examples: {include_examples}
467
- 3. Use clear transitions
468
- 4. Include engagement points
469
- 5. Use time markers [MM:SS]
470
- 6. Reference specific content from transcript
471
- 7. Maintain narrative flow
472
- 8. Use key terms consistently
473
- """
474
-
475
- response = self.openai_client.chat.completions.create(
476
- model=self.model_name,
477
- messages=[
478
- {"role": "system", "content": "You are an expert educator creating a coherent lecture transcript."},
479
- {"role": "user", "content": prompt}
480
- ],
481
- temperature=0.7,
482
- max_tokens=self._calculate_max_tokens(section_type, target_words)
483
- )
484
-
485
- content = response.choices[0].message.content
486
- word_count = self.text_processor.count_words(content)
487
- logger.info(f"Section generated: {word_count} words")
488
 
489
- return content
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
490
 
491
  def _calculate_max_tokens(self, section_type: str, target_words: int) -> int:
492
  """Calculate appropriate max_tokens based on section and model"""
@@ -536,7 +654,7 @@ class TranscriptTransformer:
536
  topic_target = topic_words[topic['title']]
537
 
538
  # Update context for topic
539
- context['current_topic'] = topic['title']
540
  if topic['title'] in context['pending_topics']:
541
  context['covered_topics'].append(topic['title'])
542
  context['pending_topics'].remove(topic['title'])
 
1
  import os
2
  import logging
3
  import json
4
+ import time
5
+ from typing import List, Dict, Optional, Callable, Any
6
  import openai
7
  from src.utils.text_processor import TextProcessor
8
 
 
17
  class TranscriptTransformer:
18
  """Transforms conversational transcripts into teaching material using LLM"""
19
 
20
+ MAX_RETRIES = 3 # Initial retries for content generation
21
+ EXTENDED_RETRIES = 3 # Additional retries with longer waits
22
+ EXTENDED_RETRY_DELAYS = [5, 10, 15] # Wait times in seconds for extended retries
23
  CHUNK_SIZE = 6000 # Target words per chunk
24
  LARGE_DEVIATION_THRESHOLD = 0.20 # 20% maximum deviation
25
  MAX_TOKENS = 64000 # Nuevo límite absoluto basado en 64k tokens de salida
 
57
  # Target word counts
58
  self.words_per_minute = 130 # Average speaking rate
59
 
60
+ def _api_call_with_enhanced_retries(self, call_func: Callable[[], Any]) -> Any:
61
+ """
62
+ Wrapper function for API calls with enhanced retry logic
63
+
64
+ Args:
65
+ call_func: Function that makes the actual API call
66
+
67
+ Returns:
68
+ The result of the successful API call
69
+
70
+ Raises:
71
+ Exception: If all retries fail
72
+ """
73
+ # Initial retries (already handled by openai client)
74
+ try:
75
+ return call_func()
76
+ except Exception as e:
77
+ error_str = str(e)
78
+
79
+ # Check if it's a quota error (429)
80
+ if "429" in error_str or "Too Many Requests" in error_str or "RESOURCE_EXHAUSTED" in error_str:
81
+ logger.warning(f"Quota error detected: {error_str}")
82
+ logger.info(f"Starting extended retries with longer waits...")
83
+
84
+ # Extended retries with longer waits
85
+ for i in range(self.EXTENDED_RETRIES):
86
+ wait_time = self.EXTENDED_RETRY_DELAYS[i]
87
+ logger.info(f"Extended retry {i+1}/{self.EXTENDED_RETRIES}: Waiting {wait_time} seconds before retry")
88
+ time.sleep(wait_time)
89
+
90
+ try:
91
+ return call_func()
92
+ except Exception as retry_error:
93
+ # If last retry, re-raise
94
+ if i == self.EXTENDED_RETRIES - 1:
95
+ logger.error(f"All extended retries failed: {str(retry_error)}")
96
+ raise
97
+ # Otherwise log and continue to next retry
98
+ logger.warning(f"Extended retry {i+1} failed: {str(retry_error)}")
99
+ else:
100
+ # Not a quota error, re-raise
101
+ raise
102
+
103
  def _validate_word_count(self, total_words: int, target_words: int, min_words: int, max_words: int) -> None:
104
  """Validate word count with flexible thresholds and log warnings/errors"""
105
  deviation = abs(total_words - target_words) / target_words
 
326
  }
327
  }
328
 
329
+ # Use the enhanced retry wrapper for API call
330
+ def api_call():
331
+ return self.openai_client.chat.completions.create(**params)
332
+
333
+ response = self._api_call_with_enhanced_retries(api_call)
334
  content = response.choices[0].message.content.strip()
335
  logger.debug(f"Raw structure response: {content}")
336
 
 
358
 
359
  except Exception as e:
360
  logger.error(f"Error generating structure: {str(e)}")
361
+ # Fallback in case of any error
362
  return self._generate_fallback_structure(text, target_duration)
363
 
364
  def _generate_fallback_structure(self, text: str, target_duration: int) -> Dict:
365
+ """Generate a simplified fallback structure in case of parsing failures"""
366
  logger.info("Generating fallback structure")
367
 
368
+ params = {
369
+ "model": self.model_name,
370
+ "messages": [
371
+ {"role": "system", "content": "You are an expert educator. Output ONLY valid JSON, no other text."},
372
+ {"role": "user", "content": f"""
373
+ Create a simplified lecture outline based on this transcript.
374
+ Format as JSON with:
375
+ - title
376
+ - 3 learning objectives
377
+ - 2 main topics with title, key concepts, subtopics
378
+ - 2 practical applications
379
+ - 3 key terms
380
+
381
+ Target duration: {target_duration} minutes
382
+
383
+ Transcript excerpt:
384
+ {text[:2000]}
385
+ """}
386
+ ],
387
+ "temperature": 0.5,
388
+ "max_tokens": 2000
389
+ }
390
 
391
  try:
392
+ # Use the enhanced retry wrapper for API call
393
+ def api_call():
394
+ return self.openai_client.chat.completions.create(**params)
395
+
396
+ response = self._api_call_with_enhanced_retries(api_call)
397
+ content = response.choices[0].message.content.strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
398
 
399
+ try:
400
+ return json.loads(content)
401
+ except json.JSONDecodeError:
402
+ # Last resort fallback if everything fails
403
+ return {
404
+ "title": "Lecture on Transcript Topic",
405
+ "learning_objectives": ["Understand key concepts", "Apply knowledge", "Evaluate outcomes"],
406
+ "topics": [
407
+ {
408
+ "title": "Main Topic 1",
409
+ "key_concepts": ["Concept 1", "Concept 2"],
410
+ "subtopics": ["Subtopic 1", "Subtopic 2"],
411
+ "duration_minutes": target_duration // 2,
412
+ "objective_links": [1, 2]
413
+ },
414
+ {
415
+ "title": "Main Topic 2",
416
+ "key_concepts": ["Concept 3", "Concept 4"],
417
+ "subtopics": ["Subtopic 3", "Subtopic 4"],
418
+ "duration_minutes": target_duration // 2,
419
+ "objective_links": [2, 3]
420
+ }
421
+ ],
422
+ "practical_applications": ["Application 1", "Application 2"],
423
+ "key_terms": ["Term 1", "Term 2", "Term 3"]
424
+ }
425
  except Exception as e:
426
  logger.error(f"Error generating fallback structure: {str(e)}")
427
+ # Hardcoded last resort fallback
428
  return {
429
+ "title": "Lecture on Transcript Topic",
430
+ "learning_objectives": ["Understand key concepts", "Apply knowledge", "Evaluate outcomes"],
431
  "topics": [
432
  {
433
+ "title": "Main Topic 1",
434
+ "key_concepts": ["Concept 1", "Concept 2"],
435
+ "subtopics": ["Subtopic 1", "Subtopic 2"],
436
  "duration_minutes": target_duration // 2,
437
+ "objective_links": [1, 2]
438
+ },
439
+ {
440
+ "title": "Main Topic 2",
441
+ "key_concepts": ["Concept 3", "Concept 4"],
442
+ "subtopics": ["Subtopic 3", "Subtopic 4"],
443
+ "duration_minutes": target_duration // 2,
444
+ "objective_links": [2, 3]
445
  }
446
  ],
447
+ "practical_applications": ["Application 1", "Application 2"],
448
+ "key_terms": ["Term 1", "Term 2", "Term 3"]
449
  }
450
 
451
  def _generate_section(self,
 
458
  is_first: bool = False,
459
  is_last: bool = False,
460
  initial_prompt: Optional[str] = None) -> str:
461
+ """Generate a specific section of the lecture"""
462
  logger.info(f"Generating {section_type} section (target: {target_words} words)")
463
 
464
+ # Calculate timing markers
465
+ if section_type == 'introduction':
466
+ time_marker = '[00:00]'
467
+ elif section_type == 'summary':
468
+ duration_mins = sum(topic.get('duration_minutes', 5) for topic in structure_data['topics'])
469
+ # Asegurar que duration_mins es un entero y nunca menor a 5
470
+ adjusted_mins = max(5, int(duration_mins - 5))
471
+ time_marker = f'[{adjusted_mins:02d}:00]'
472
+ else:
473
+ # For other sections, use appropriate time markers
474
+ time_marker = '[XX:XX]' # Will be replaced within the prompt
475
+
476
+ user_instructions = f"\nAdditional user instructions:\n{initial_prompt}\n" if initial_prompt else ""
477
 
478
+ # Base prompt with context-specific formatting
479
  prompt = f"""
480
+ You are creating a {section_type} section for a {time_marker} teaching lecture on "{structure_data['title']}".
481
  {user_instructions}
482
+ Target word count: {target_words} words (very important)
483
+
484
+ Learning objectives:
485
+ {', '.join(structure_data['learning_objectives'])}
486
 
487
+ Key terms:
488
+ {', '.join(structure_data['key_terms'])}
489
 
490
+ Original source:
491
+ {original_text[:500]}...
492
  """
493
 
494
+ # Section-specific instructions
495
  if section_type == 'introduction':
496
  prompt += """
497
  - Start with an engaging hook
 
501
  """
502
  elif section_type == 'main':
503
  prompt += f"""
504
+ Discuss one main topic in depth.
505
+
506
+ Topic: {context['current_topic']['title']}
507
+ Key concepts: {', '.join(context['current_topic']['key_concepts'])}
508
+ Subtopics: {', '.join(context['current_topic']['subtopics'])}
509
+
510
+ - Start with appropriate time marker
511
+ - Explain key concepts clearly
512
+ - Include real-world examples
513
+ - Connect to learning objectives
514
+ - Use appropriate time markers within the section
515
  """
516
  elif section_type == 'practical':
517
+ prompt += f"""
518
+ Create a practical applications section with:
519
+
520
+ - Start with appropriate time marker
521
+ - 2-3 practical examples or case studies
522
+ - Clear connections to the main topics
523
+ - Interactive elements (questions, exercises)
524
+
525
+ Practical applications to cover:
526
+ {', '.join(structure_data['practical_applications'])}
527
  """
528
  elif section_type == 'summary':
529
  prompt += """
530
+ Create a concise summary:
 
 
 
 
531
 
532
+ - Start with appropriate time marker
533
+ - Reinforce key learning points
534
+ - Brief recap of main topics
535
+ - Call to action or follow-up suggestions
536
+ """
537
+
538
+ # Context-specific content
539
  if context:
540
  prompt += f"""
541
 
542
+ Previously covered topics:
543
+ {', '.join(context['covered_topics'])}
 
 
 
 
544
 
545
+ Pending topics:
546
+ {', '.join(context['pending_topics'])}
547
+
548
+ Recent narrative context:
549
+ {context['current_narrative']}
550
+ """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
551
 
552
+ # First/last section specific instructions
553
+ if is_first:
554
+ prompt += """
555
+
556
+ This is the FIRST section of the lecture. Make it engaging and set the tone.
557
+ """
558
+ elif is_last:
559
+ prompt += """
560
+
561
+ This is the FINAL section of the lecture. Ensure proper closure and reinforcement.
562
+ """
563
+
564
+ # Add section-specific time markers for formatted output
565
+ if section_type != 'introduction':
566
+ prompt += """
567
+
568
+ IMPORTANT: Include appropriate time markers [MM:SS] throughout the section.
569
+ """
570
+
571
+ try:
572
+ # Prepare API call parameters
573
+ params = {
574
+ "model": self.model_name,
575
+ "messages": [
576
+ {"role": "system", "content": "You are an expert educator creating a teaching script."},
577
+ {"role": "user", "content": prompt}
578
+ ],
579
+ "temperature": 0.7,
580
+ "max_tokens": self._calculate_max_tokens(section_type, target_words)
581
+ }
582
+
583
+ # Add thinking config if using experimental model
584
+ if self.use_thinking_model:
585
+ params["extra_body"] = {
586
+ "thinking_config": {
587
+ "include_thoughts": True
588
+ }
589
+ }
590
+
591
+ # Use the enhanced retry wrapper for API call
592
+ def api_call():
593
+ return self.openai_client.chat.completions.create(**params)
594
+
595
+ response = self._api_call_with_enhanced_retries(api_call)
596
+ content = response.choices[0].message.content.strip()
597
+
598
+ # Validate output length
599
+ content_words = self.text_processor.count_words(content)
600
+ logger.info(f"Section generated: {content_words} words")
601
+
602
+ return content
603
+
604
+ except Exception as e:
605
+ logger.error(f"Error during content generation: {str(e)}")
606
+ # Provide a minimal fallback content to avoid complete failure
607
+ return f"{time_marker} {section_type.capitalize()} (Error during generation)\n\nWe apologize, but there was an error generating this section."
608
 
609
  def _calculate_max_tokens(self, section_type: str, target_words: int) -> int:
610
  """Calculate appropriate max_tokens based on section and model"""
 
654
  topic_target = topic_words[topic['title']]
655
 
656
  # Update context for topic
657
+ context['current_topic'] = topic
658
  if topic['title'] in context['pending_topics']:
659
  context['covered_topics'].append(topic['title'])
660
  context['pending_topics'].remove(topic['title'])