Spaces:
Runtime error
Runtime error
Upload folder using huggingface_hub
Browse files- requirements.txt +1 -3
- src/app.py +28 -37
- src/core/transformer.py +75 -138
requirements.txt
CHANGED
|
@@ -2,9 +2,7 @@ gradio==4.26.0
|
|
| 2 |
transformers>=4.30.0
|
| 3 |
torch>=2.0.0
|
| 4 |
pypdf2>=3.0.0
|
| 5 |
-
|
| 6 |
-
numpy>=1.21.0
|
| 7 |
tqdm>=4.65.0
|
| 8 |
-
openai>=1.0.0
|
| 9 |
tiktoken>=0.5.0
|
| 10 |
fastapi<0.110.0
|
|
|
|
| 2 |
transformers>=4.30.0
|
| 3 |
torch>=2.0.0
|
| 4 |
pypdf2>=3.0.0
|
| 5 |
+
numpy>=1.26.4
|
|
|
|
| 6 |
tqdm>=4.65.0
|
|
|
|
| 7 |
tiktoken>=0.5.0
|
| 8 |
fastapi<0.110.0
|
src/app.py
CHANGED
|
@@ -2,7 +2,6 @@ import os
|
|
| 2 |
import gradio as gr
|
| 3 |
import re
|
| 4 |
import logging # Added for debugging
|
| 5 |
-
from dotenv import load_dotenv
|
| 6 |
from src.core.transformer import TranscriptTransformer
|
| 7 |
from src.utils.pdf_processor import PDFProcessor
|
| 8 |
from src.utils.text_processor import TextProcessor
|
|
@@ -10,9 +9,6 @@ from src.utils.text_processor import TextProcessor
|
|
| 10 |
# Set up basic logging
|
| 11 |
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
| 12 |
|
| 13 |
-
load_dotenv()
|
| 14 |
-
logging.info("Environment variables loaded.")
|
| 15 |
-
|
| 16 |
# Translations dictionary for UI elements
|
| 17 |
TRANSLATIONS = {
|
| 18 |
"en": {
|
|
@@ -28,7 +24,6 @@ TRANSLATIONS = {
|
|
| 28 |
"guiding_prompt_info": "The Guiding Prompt allows you to provide specific instructions to modify the generated content, like output/desired LANGUAGE. You can use it to change the tone, style, focus ONLY on specific sections of the text, specify the output language (e.g., 'Generate in Spanish/French/German'), or give any other instruction that helps personalize the final result.",
|
| 29 |
"duration_label": "Target Lecture Duration (minutes)",
|
| 30 |
"examples_label": "Include Practical Examples",
|
| 31 |
-
"thinking_model_label": "Use Experimental Thinking Model (Gemini Only)",
|
| 32 |
"submit_button": "Transform Transcript",
|
| 33 |
"output_label": "Generated Teaching Transcript",
|
| 34 |
"error_no_pdf": "Error: No PDF file uploaded",
|
|
@@ -51,7 +46,6 @@ TRANSLATIONS = {
|
|
| 51 |
"guiding_prompt_info": "Las Instrucciones Guía te permiten proporcionar indicaciones específicas para modificar el contenido generado, como el IDIOMA deseado. Puedes usarlas para cambiar el tono, estilo, enfocarte SOLO en secciones específicas del texto, especificar el idioma de salida (ej., 'Generar en inglés/francés/alemán'), o dar cualquier otra instrucción que ayude a personalizar el resultado final.",
|
| 52 |
"duration_label": "Duración Objetivo de la Clase (minutos)",
|
| 53 |
"examples_label": "Incluir Ejemplos Prácticos",
|
| 54 |
-
"thinking_model_label": "Usar Modelo de Pensamiento Experimental (Solo Gemini)",
|
| 55 |
"submit_button": "Transformar Transcripción",
|
| 56 |
"output_label": "Guión de Enseñanza Generado",
|
| 57 |
"error_no_pdf": "Error: No se ha subido ningún archivo PDF",
|
|
@@ -74,6 +68,15 @@ class TranscriptTransformerApp:
|
|
| 74 |
logging.info("Initializing TranscriptTransformerApp...")
|
| 75 |
self.pdf_processor = PDFProcessor()
|
| 76 |
self.text_processor = TextProcessor()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 77 |
self.current_language = "en" # Default language
|
| 78 |
self.last_generated_content = "" # Store the last generated content
|
| 79 |
self.content_with_timestamps = "" # Store content with timestamps
|
|
@@ -87,11 +90,9 @@ class TranscriptTransformerApp:
|
|
| 87 |
raw_text_input: str = "",
|
| 88 |
initial_prompt: str = "",
|
| 89 |
target_duration: int = 30,
|
| 90 |
-
include_examples: bool = True
|
| 91 |
-
use_gemini: bool = True,
|
| 92 |
-
use_thinking_model: bool = False) -> str:
|
| 93 |
"""
|
| 94 |
-
Process uploaded transcript and transform it into a teaching transcript
|
| 95 |
|
| 96 |
Args:
|
| 97 |
language: Selected UI language
|
|
@@ -101,23 +102,20 @@ class TranscriptTransformerApp:
|
|
| 101 |
initial_prompt: Additional guiding instructions for the content generation
|
| 102 |
target_duration: Target lecture duration in minutes
|
| 103 |
include_examples: Whether to include practical examples
|
| 104 |
-
use_gemini: Whether to use Gemini API instead of OpenAI
|
| 105 |
-
use_thinking_model: Requires use_gemini=True
|
| 106 |
|
| 107 |
Returns:
|
| 108 |
str: Generated teaching transcript
|
| 109 |
"""
|
| 110 |
-
logging.info(f"Processing transcript.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 111 |
try:
|
| 112 |
-
#
|
| 113 |
-
if use_thinking_model:
|
| 114 |
-
logging.info("Thinking model selected, forcing use_gemini=True")
|
| 115 |
-
use_gemini = True
|
| 116 |
-
|
| 117 |
-
self.transformer = TranscriptTransformer(
|
| 118 |
-
use_gemini=use_gemini,
|
| 119 |
-
use_thinking_model=use_thinking_model
|
| 120 |
-
)
|
| 121 |
|
| 122 |
# Get text based on input type
|
| 123 |
if input_type == TRANSLATIONS[language]["input_type_options"][0]: # PDF
|
|
@@ -194,6 +192,7 @@ class TranscriptTransformerApp:
|
|
| 194 |
|
| 195 |
translations = TRANSLATIONS[language]
|
| 196 |
|
|
|
|
| 197 |
return [
|
| 198 |
translations["title"],
|
| 199 |
translations["subtitle"],
|
|
@@ -207,11 +206,9 @@ class TranscriptTransformerApp:
|
|
| 207 |
translations["guiding_prompt_info"],
|
| 208 |
translations["duration_label"],
|
| 209 |
translations["examples_label"],
|
| 210 |
-
translations["thinking_model_label"],
|
| 211 |
translations["submit_button"],
|
| 212 |
translations["output_label"]
|
| 213 |
]
|
| 214 |
-
logging.info("UI language updated.")
|
| 215 |
|
| 216 |
def launch(self):
|
| 217 |
"""Launch the Gradio interface"""
|
|
@@ -284,11 +281,6 @@ class TranscriptTransformerApp:
|
|
| 284 |
label=TRANSLATIONS["en"]["examples_label"],
|
| 285 |
value=True
|
| 286 |
)
|
| 287 |
-
|
| 288 |
-
use_thinking_model = gr.Checkbox(
|
| 289 |
-
label=TRANSLATIONS["en"]["thinking_model_label"],
|
| 290 |
-
value=True
|
| 291 |
-
)
|
| 292 |
|
| 293 |
# Submit button
|
| 294 |
with gr.Row():
|
|
@@ -337,6 +329,7 @@ class TranscriptTransformerApp:
|
|
| 337 |
|
| 338 |
translations = TRANSLATIONS[language]
|
| 339 |
|
|
|
|
| 340 |
return [
|
| 341 |
"# " + translations["title"], # Title with markdown formatting
|
| 342 |
translations["subtitle"],
|
|
@@ -347,12 +340,10 @@ class TranscriptTransformerApp:
|
|
| 347 |
gr.update(label=translations["guiding_prompt_label"], placeholder=translations["guiding_prompt_placeholder"], info=translations["guiding_prompt_info"]),
|
| 348 |
gr.update(label=translations["duration_label"]),
|
| 349 |
gr.update(label=translations["examples_label"]),
|
| 350 |
-
gr.update(label=translations["thinking_model_label"]),
|
| 351 |
translations["submit_button"],
|
| 352 |
gr.update(label=translations["output_label"]),
|
| 353 |
gr.update(label=translations["show_timestamps"])
|
| 354 |
]
|
| 355 |
-
logging.info("UI elements update values prepared.")
|
| 356 |
|
| 357 |
input_type.change(
|
| 358 |
fn=lambda lang_display, choice: update_input_visibility(lang_display, choice),
|
|
@@ -369,7 +360,7 @@ class TranscriptTransformerApp:
|
|
| 369 |
input_type, input_type,
|
| 370 |
file_input, text_input,
|
| 371 |
initial_prompt,
|
| 372 |
-
target_duration, include_examples,
|
| 373 |
submit_btn, output,
|
| 374 |
timestamps_checkbox
|
| 375 |
]
|
|
@@ -382,9 +373,10 @@ class TranscriptTransformerApp:
|
|
| 382 |
outputs=[output]
|
| 383 |
)
|
| 384 |
|
| 385 |
-
# Set up submission logic
|
| 386 |
submit_btn.click(
|
| 387 |
-
fn=lambda lang_display,
|
|
|
|
| 388 |
inputs=[
|
| 389 |
language_selector,
|
| 390 |
input_type,
|
|
@@ -393,7 +385,6 @@ class TranscriptTransformerApp:
|
|
| 393 |
initial_prompt,
|
| 394 |
target_duration,
|
| 395 |
include_examples,
|
| 396 |
-
use_thinking_model
|
| 397 |
],
|
| 398 |
outputs=output
|
| 399 |
)
|
|
@@ -401,8 +392,8 @@ class TranscriptTransformerApp:
|
|
| 401 |
# Example for PDF input
|
| 402 |
logging.info("Setting up Gradio Examples...")
|
| 403 |
gr.Examples(
|
| 404 |
-
examples=[[example_pdf, "", "", 30, True
|
| 405 |
-
inputs=[file_input, text_input, initial_prompt, target_duration, include_examples
|
| 406 |
)
|
| 407 |
logging.info("Gradio Examples configured.")
|
| 408 |
|
|
|
|
| 2 |
import gradio as gr
|
| 3 |
import re
|
| 4 |
import logging # Added for debugging
|
|
|
|
| 5 |
from src.core.transformer import TranscriptTransformer
|
| 6 |
from src.utils.pdf_processor import PDFProcessor
|
| 7 |
from src.utils.text_processor import TextProcessor
|
|
|
|
| 9 |
# Set up basic logging
|
| 10 |
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
| 11 |
|
|
|
|
|
|
|
|
|
|
| 12 |
# Translations dictionary for UI elements
|
| 13 |
TRANSLATIONS = {
|
| 14 |
"en": {
|
|
|
|
| 24 |
"guiding_prompt_info": "The Guiding Prompt allows you to provide specific instructions to modify the generated content, like output/desired LANGUAGE. You can use it to change the tone, style, focus ONLY on specific sections of the text, specify the output language (e.g., 'Generate in Spanish/French/German'), or give any other instruction that helps personalize the final result.",
|
| 25 |
"duration_label": "Target Lecture Duration (minutes)",
|
| 26 |
"examples_label": "Include Practical Examples",
|
|
|
|
| 27 |
"submit_button": "Transform Transcript",
|
| 28 |
"output_label": "Generated Teaching Transcript",
|
| 29 |
"error_no_pdf": "Error: No PDF file uploaded",
|
|
|
|
| 46 |
"guiding_prompt_info": "Las Instrucciones Guía te permiten proporcionar indicaciones específicas para modificar el contenido generado, como el IDIOMA deseado. Puedes usarlas para cambiar el tono, estilo, enfocarte SOLO en secciones específicas del texto, especificar el idioma de salida (ej., 'Generar en inglés/francés/alemán'), o dar cualquier otra instrucción que ayude a personalizar el resultado final.",
|
| 47 |
"duration_label": "Duración Objetivo de la Clase (minutos)",
|
| 48 |
"examples_label": "Incluir Ejemplos Prácticos",
|
|
|
|
| 49 |
"submit_button": "Transformar Transcripción",
|
| 50 |
"output_label": "Guión de Enseñanza Generado",
|
| 51 |
"error_no_pdf": "Error: No se ha subido ningún archivo PDF",
|
|
|
|
| 68 |
logging.info("Initializing TranscriptTransformerApp...")
|
| 69 |
self.pdf_processor = PDFProcessor()
|
| 70 |
self.text_processor = TextProcessor()
|
| 71 |
+
# Initialize transformer directly (it always uses Gemini now)
|
| 72 |
+
try:
|
| 73 |
+
self.transformer = TranscriptTransformer()
|
| 74 |
+
except ValueError as e:
|
| 75 |
+
# Handle case where GEMINI_API_KEY might be missing during init
|
| 76 |
+
logging.error(f"Failed to initialize Transformer: {e}")
|
| 77 |
+
# Optionally, set self.transformer to None and handle in process_transcript
|
| 78 |
+
# Or re-raise / exit, depending on desired behavior
|
| 79 |
+
self.transformer = None # Indicate initialization failure
|
| 80 |
self.current_language = "en" # Default language
|
| 81 |
self.last_generated_content = "" # Store the last generated content
|
| 82 |
self.content_with_timestamps = "" # Store content with timestamps
|
|
|
|
| 90 |
raw_text_input: str = "",
|
| 91 |
initial_prompt: str = "",
|
| 92 |
target_duration: int = 30,
|
| 93 |
+
include_examples: bool = True) -> str:
|
|
|
|
|
|
|
| 94 |
"""
|
| 95 |
+
Process uploaded transcript and transform it into a teaching transcript using Gemini.
|
| 96 |
|
| 97 |
Args:
|
| 98 |
language: Selected UI language
|
|
|
|
| 102 |
initial_prompt: Additional guiding instructions for the content generation
|
| 103 |
target_duration: Target lecture duration in minutes
|
| 104 |
include_examples: Whether to include practical examples
|
|
|
|
|
|
|
| 105 |
|
| 106 |
Returns:
|
| 107 |
str: Generated teaching transcript
|
| 108 |
"""
|
| 109 |
+
logging.info(f"Processing transcript. Lang: {language}, Type: {input_type}, HasFile: {file_obj is not None}, HasText: {bool(raw_text_input)}, Duration: {target_duration}, Examples: {include_examples}")
|
| 110 |
+
|
| 111 |
+
# Check if transformer initialized correctly
|
| 112 |
+
if self.transformer is None:
|
| 113 |
+
logging.error("Transformer not initialized, likely missing API key.")
|
| 114 |
+
# Return an error message appropriate for the UI language
|
| 115 |
+
return TRANSLATIONS[language].get("error_prefix", "Error: ") + "Configuration error (API Key missing?)"
|
| 116 |
+
|
| 117 |
try:
|
| 118 |
+
# No need to initialize transformer here anymore
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 119 |
|
| 120 |
# Get text based on input type
|
| 121 |
if input_type == TRANSLATIONS[language]["input_type_options"][0]: # PDF
|
|
|
|
| 192 |
|
| 193 |
translations = TRANSLATIONS[language]
|
| 194 |
|
| 195 |
+
logging.info("UI language updated.")
|
| 196 |
return [
|
| 197 |
translations["title"],
|
| 198 |
translations["subtitle"],
|
|
|
|
| 206 |
translations["guiding_prompt_info"],
|
| 207 |
translations["duration_label"],
|
| 208 |
translations["examples_label"],
|
|
|
|
| 209 |
translations["submit_button"],
|
| 210 |
translations["output_label"]
|
| 211 |
]
|
|
|
|
| 212 |
|
| 213 |
def launch(self):
|
| 214 |
"""Launch the Gradio interface"""
|
|
|
|
| 281 |
label=TRANSLATIONS["en"]["examples_label"],
|
| 282 |
value=True
|
| 283 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 284 |
|
| 285 |
# Submit button
|
| 286 |
with gr.Row():
|
|
|
|
| 329 |
|
| 330 |
translations = TRANSLATIONS[language]
|
| 331 |
|
| 332 |
+
logging.info("UI elements update values prepared.")
|
| 333 |
return [
|
| 334 |
"# " + translations["title"], # Title with markdown formatting
|
| 335 |
translations["subtitle"],
|
|
|
|
| 340 |
gr.update(label=translations["guiding_prompt_label"], placeholder=translations["guiding_prompt_placeholder"], info=translations["guiding_prompt_info"]),
|
| 341 |
gr.update(label=translations["duration_label"]),
|
| 342 |
gr.update(label=translations["examples_label"]),
|
|
|
|
| 343 |
translations["submit_button"],
|
| 344 |
gr.update(label=translations["output_label"]),
|
| 345 |
gr.update(label=translations["show_timestamps"])
|
| 346 |
]
|
|
|
|
| 347 |
|
| 348 |
input_type.change(
|
| 349 |
fn=lambda lang_display, choice: update_input_visibility(lang_display, choice),
|
|
|
|
| 360 |
input_type, input_type,
|
| 361 |
file_input, text_input,
|
| 362 |
initial_prompt,
|
| 363 |
+
target_duration, include_examples,
|
| 364 |
submit_btn, output,
|
| 365 |
timestamps_checkbox
|
| 366 |
]
|
|
|
|
| 373 |
outputs=[output]
|
| 374 |
)
|
| 375 |
|
| 376 |
+
# Set up submission logic
|
| 377 |
submit_btn.click(
|
| 378 |
+
fn=lambda lang_display, input_type_val, file_obj_val, text_input_val, initial_prompt_val, target_duration_val, include_examples_val: \
|
| 379 |
+
self.process_transcript(get_language_code(lang_display), input_type_val, file_obj_val, text_input_val, initial_prompt_val, target_duration_val, include_examples_val),
|
| 380 |
inputs=[
|
| 381 |
language_selector,
|
| 382 |
input_type,
|
|
|
|
| 385 |
initial_prompt,
|
| 386 |
target_duration,
|
| 387 |
include_examples,
|
|
|
|
| 388 |
],
|
| 389 |
outputs=output
|
| 390 |
)
|
|
|
|
| 392 |
# Example for PDF input
|
| 393 |
logging.info("Setting up Gradio Examples...")
|
| 394 |
gr.Examples(
|
| 395 |
+
examples=[[example_pdf, "", "", 30, True]],
|
| 396 |
+
inputs=[file_input, text_input, initial_prompt, target_duration, include_examples]
|
| 397 |
)
|
| 398 |
logging.info("Gradio Examples configured.")
|
| 399 |
|
src/core/transformer.py
CHANGED
|
@@ -22,40 +22,31 @@ class TranscriptTransformer:
|
|
| 22 |
EXTENDED_RETRY_DELAYS = [5, 10, 15] # Wait times in seconds for extended retries
|
| 23 |
CHUNK_SIZE = 6000 # Target words per chunk
|
| 24 |
LARGE_DEVIATION_THRESHOLD = 0.20 # 20% maximum deviation
|
| 25 |
-
MAX_TOKENS = 64000 #
|
| 26 |
|
| 27 |
-
def __init__(self
|
| 28 |
-
"""Initialize the transformer
|
| 29 |
self.text_processor = TextProcessor()
|
| 30 |
-
self.use_gemini = use_gemini
|
| 31 |
-
self.use_thinking_model = use_thinking_model
|
| 32 |
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
)
|
| 49 |
-
self.model_name = "gemini-2.0-flash-exp"
|
| 50 |
-
else:
|
| 51 |
-
logger.info("Initializing with OpenAI API")
|
| 52 |
-
self.openai_client = openai.OpenAI(
|
| 53 |
-
api_key=os.getenv('OPENAI_API_KEY')
|
| 54 |
-
)
|
| 55 |
-
self.model_name = "gpt-3.5-turbo"
|
| 56 |
|
| 57 |
# Target word counts
|
| 58 |
self.words_per_minute = 130 # Average speaking rate
|
|
|
|
| 59 |
|
| 60 |
def _api_call_with_enhanced_retries(self, call_func: Callable[[], Any]) -> Any:
|
| 61 |
"""
|
|
@@ -267,6 +258,7 @@ class TranscriptTransformer:
|
|
| 267 |
|
| 268 |
user_instructions = f"\nAdditional user instructions:\n{initial_prompt}\n" if initial_prompt else ""
|
| 269 |
|
|
|
|
| 270 |
prompt = f"""
|
| 271 |
You are an expert educator creating a detailed lecture outline.
|
| 272 |
{user_instructions}
|
|
@@ -307,30 +299,30 @@ class TranscriptTransformer:
|
|
| 307 |
"""
|
| 308 |
|
| 309 |
try:
|
| 310 |
-
# Common parameters
|
| 311 |
params = {
|
| 312 |
-
"model": self.model_name,
|
| 313 |
"messages": [
|
| 314 |
{"role": "system", "content": "You are an expert educator. Output ONLY valid JSON, no other text."},
|
| 315 |
{"role": "user", "content": prompt}
|
| 316 |
],
|
| 317 |
"temperature": 0.7,
|
| 318 |
-
|
|
|
|
| 319 |
}
|
| 320 |
-
|
| 321 |
-
# Add thinking config if using experimental model
|
| 322 |
-
if self.use_thinking_model:
|
| 323 |
-
params["extra_body"] = {
|
| 324 |
-
"thinking_config": {
|
| 325 |
-
"include_thoughts": True
|
| 326 |
-
}
|
| 327 |
-
}
|
| 328 |
|
| 329 |
# Use the enhanced retry wrapper for API call
|
| 330 |
def api_call():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 331 |
return self.openai_client.chat.completions.create(**params)
|
| 332 |
|
| 333 |
response = self._api_call_with_enhanced_retries(api_call)
|
|
|
|
| 334 |
content = response.choices[0].message.content.strip()
|
| 335 |
logger.debug(f"Raw structure response: {content}")
|
| 336 |
|
|
@@ -357,7 +349,7 @@ class TranscriptTransformer:
|
|
| 357 |
return self._generate_fallback_structure(text, target_duration)
|
| 358 |
|
| 359 |
except Exception as e:
|
| 360 |
-
logger.error(f"Error generating structure: {str(e)}")
|
| 361 |
# Fallback in case of any error
|
| 362 |
return self._generate_fallback_structure(text, target_duration)
|
| 363 |
|
|
@@ -366,87 +358,46 @@ class TranscriptTransformer:
|
|
| 366 |
logger.info("Generating fallback structure")
|
| 367 |
|
| 368 |
params = {
|
| 369 |
-
|
| 370 |
-
|
| 371 |
-
|
| 372 |
-
|
| 373 |
-
|
| 374 |
-
|
| 375 |
-
|
| 376 |
-
|
| 377 |
-
|
| 378 |
-
|
| 379 |
-
|
| 380 |
-
|
| 381 |
-
|
| 382 |
-
|
| 383 |
-
|
| 384 |
-
|
| 385 |
-
|
| 386 |
-
|
| 387 |
-
|
| 388 |
-
|
| 389 |
-
|
| 390 |
|
| 391 |
try:
|
| 392 |
# Use the enhanced retry wrapper for API call
|
| 393 |
def api_call():
|
| 394 |
-
|
| 395 |
|
| 396 |
response = self._api_call_with_enhanced_retries(api_call)
|
| 397 |
content = response.choices[0].message.content.strip()
|
| 398 |
|
| 399 |
try:
|
| 400 |
-
|
| 401 |
except json.JSONDecodeError:
|
| 402 |
-
|
| 403 |
-
|
| 404 |
-
|
| 405 |
-
"learning_objectives": ["Understand key concepts", "Apply knowledge", "Evaluate outcomes"],
|
| 406 |
-
"topics": [
|
| 407 |
-
{
|
| 408 |
-
"title": "Main Topic 1",
|
| 409 |
-
"key_concepts": ["Concept 1", "Concept 2"],
|
| 410 |
-
"subtopics": ["Subtopic 1", "Subtopic 2"],
|
| 411 |
-
"duration_minutes": target_duration // 2,
|
| 412 |
-
"objective_links": [1, 2]
|
| 413 |
-
},
|
| 414 |
-
{
|
| 415 |
-
"title": "Main Topic 2",
|
| 416 |
-
"key_concepts": ["Concept 3", "Concept 4"],
|
| 417 |
-
"subtopics": ["Subtopic 3", "Subtopic 4"],
|
| 418 |
-
"duration_minutes": target_duration // 2,
|
| 419 |
-
"objective_links": [2, 3]
|
| 420 |
-
}
|
| 421 |
-
],
|
| 422 |
-
"practical_applications": ["Application 1", "Application 2"],
|
| 423 |
-
"key_terms": ["Term 1", "Term 2", "Term 3"]
|
| 424 |
-
}
|
| 425 |
except Exception as e:
|
| 426 |
-
logger.error(f"Error generating fallback structure: {str(e)}")
|
| 427 |
-
# Hardcoded last resort fallback
|
| 428 |
-
|
| 429 |
-
"title": "Lecture on Transcript Topic",
|
| 430 |
-
"learning_objectives": ["Understand key concepts", "Apply knowledge", "Evaluate outcomes"],
|
| 431 |
-
"topics": [
|
| 432 |
-
{
|
| 433 |
-
"title": "Main Topic 1",
|
| 434 |
-
"key_concepts": ["Concept 1", "Concept 2"],
|
| 435 |
-
"subtopics": ["Subtopic 1", "Subtopic 2"],
|
| 436 |
-
"duration_minutes": target_duration // 2,
|
| 437 |
-
"objective_links": [1, 2]
|
| 438 |
-
},
|
| 439 |
-
{
|
| 440 |
-
"title": "Main Topic 2",
|
| 441 |
-
"key_concepts": ["Concept 3", "Concept 4"],
|
| 442 |
-
"subtopics": ["Subtopic 3", "Subtopic 4"],
|
| 443 |
-
"duration_minutes": target_duration // 2,
|
| 444 |
-
"objective_links": [2, 3]
|
| 445 |
-
}
|
| 446 |
-
],
|
| 447 |
-
"practical_applications": ["Application 1", "Application 2"],
|
| 448 |
-
"key_terms": ["Term 1", "Term 2", "Term 3"]
|
| 449 |
-
}
|
| 450 |
|
| 451 |
def _generate_section(self,
|
| 452 |
section_type: str,
|
|
@@ -475,7 +426,7 @@ class TranscriptTransformer:
|
|
| 475 |
|
| 476 |
user_instructions = f"\nAdditional user instructions:\n{initial_prompt}\n" if initial_prompt else ""
|
| 477 |
|
| 478 |
-
# Base prompt
|
| 479 |
prompt = f"""
|
| 480 |
You are creating a {section_type} section for a {time_marker} teaching lecture on "{structure_data['title']}".
|
| 481 |
{user_instructions}
|
|
@@ -487,7 +438,7 @@ class TranscriptTransformer:
|
|
| 487 |
Key terms:
|
| 488 |
{', '.join(structure_data['key_terms'])}
|
| 489 |
|
| 490 |
-
Original source:
|
| 491 |
{original_text[:500]}...
|
| 492 |
"""
|
| 493 |
|
|
@@ -569,24 +520,16 @@ class TranscriptTransformer:
|
|
| 569 |
"""
|
| 570 |
|
| 571 |
try:
|
| 572 |
-
# Prepare API call parameters
|
| 573 |
params = {
|
| 574 |
-
"model": self.model_name,
|
| 575 |
"messages": [
|
| 576 |
{"role": "system", "content": "You are an expert educator creating a teaching script."},
|
| 577 |
{"role": "user", "content": prompt}
|
| 578 |
],
|
| 579 |
"temperature": 0.7,
|
| 580 |
-
"max_tokens": self._calculate_max_tokens(section_type, target_words)
|
| 581 |
}
|
| 582 |
-
|
| 583 |
-
# Add thinking config if using experimental model
|
| 584 |
-
if self.use_thinking_model:
|
| 585 |
-
params["extra_body"] = {
|
| 586 |
-
"thinking_config": {
|
| 587 |
-
"include_thoughts": True
|
| 588 |
-
}
|
| 589 |
-
}
|
| 590 |
|
| 591 |
# Use the enhanced retry wrapper for API call
|
| 592 |
def api_call():
|
|
@@ -602,27 +545,21 @@ class TranscriptTransformer:
|
|
| 602 |
return content
|
| 603 |
|
| 604 |
except Exception as e:
|
| 605 |
-
logger.error(f"Error during
|
| 606 |
# Provide a minimal fallback content to avoid complete failure
|
| 607 |
return f"{time_marker} {section_type.capitalize()} (Error during generation)\n\nWe apologize, but there was an error generating this section."
|
| 608 |
|
| 609 |
def _calculate_max_tokens(self, section_type: str, target_words: int) -> int:
|
| 610 |
"""Calculate appropriate max_tokens based on section and model"""
|
| 611 |
# 1 token ≈ 4 caracteres (1 palabra ≈ 1.33 tokens)
|
| 612 |
-
|
| 613 |
-
|
| 614 |
-
|
| 615 |
-
|
| 616 |
-
|
| 617 |
-
|
| 618 |
-
|
| 619 |
-
|
| 620 |
-
'summary': 8000
|
| 621 |
-
}
|
| 622 |
-
return min(base_tokens * 2, section_limits.get(section_type, 16000))
|
| 623 |
-
|
| 624 |
-
# Límites para otros modelos
|
| 625 |
-
return min(base_tokens + 1000, self.MAX_TOKENS)
|
| 626 |
|
| 627 |
def _generate_main_content(self,
|
| 628 |
structure_data: Dict,
|
|
|
|
| 22 |
EXTENDED_RETRY_DELAYS = [5, 10, 15] # Wait times in seconds for extended retries
|
| 23 |
CHUNK_SIZE = 6000 # Target words per chunk
|
| 24 |
LARGE_DEVIATION_THRESHOLD = 0.20 # 20% maximum deviation
|
| 25 |
+
MAX_TOKENS = 64000 # Using a fixed large token limit, adjust if needed per model
|
| 26 |
|
| 27 |
+
def __init__(self):
|
| 28 |
+
"""Initialize the transformer to always use the Gemini API"""
|
| 29 |
self.text_processor = TextProcessor()
|
|
|
|
|
|
|
| 30 |
|
| 31 |
+
gemini_api_key = os.environ.get('GEMINI_API_KEY')
|
| 32 |
+
if not gemini_api_key:
|
| 33 |
+
logger.error("GEMINI_API_KEY environment variable not found!")
|
| 34 |
+
# Optionally raise an error or handle missing key
|
| 35 |
+
raise ValueError("Missing GEMINI_API_KEY environment variable")
|
| 36 |
+
|
| 37 |
+
logger.info("Initializing with Gemini API (Flash Model)")
|
| 38 |
+
# Using v1beta as the base URL for the standard Gemini Flash model
|
| 39 |
+
self.openai_client = openai.OpenAI(
|
| 40 |
+
api_key=gemini_api_key,
|
| 41 |
+
base_url="https://generativelanguage.googleapis.com/v1beta"
|
| 42 |
+
)
|
| 43 |
+
# Using gemini-1.5-flash-latest as a stable and capable model
|
| 44 |
+
# Replace 'gemini-1.5-flash-latest' if you need a specific experimental version
|
| 45 |
+
self.model_name = "models/gemini-1.5-flash-latest"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 46 |
|
| 47 |
# Target word counts
|
| 48 |
self.words_per_minute = 130 # Average speaking rate
|
| 49 |
+
logger.info(f"Transformer initialized with model: {self.model_name}")
|
| 50 |
|
| 51 |
def _api_call_with_enhanced_retries(self, call_func: Callable[[], Any]) -> Any:
|
| 52 |
"""
|
|
|
|
| 258 |
|
| 259 |
user_instructions = f"\nAdditional user instructions:\n{initial_prompt}\n" if initial_prompt else ""
|
| 260 |
|
| 261 |
+
# Simplified prompt slightly, removed references to experimental models
|
| 262 |
prompt = f"""
|
| 263 |
You are an expert educator creating a detailed lecture outline.
|
| 264 |
{user_instructions}
|
|
|
|
| 299 |
"""
|
| 300 |
|
| 301 |
try:
|
| 302 |
+
# Common parameters - simplified
|
| 303 |
params = {
|
| 304 |
+
"model": self.model_name,
|
| 305 |
"messages": [
|
| 306 |
{"role": "system", "content": "You are an expert educator. Output ONLY valid JSON, no other text."},
|
| 307 |
{"role": "user", "content": prompt}
|
| 308 |
],
|
| 309 |
"temperature": 0.7,
|
| 310 |
+
# Use a reasonable max_token limit for structure generation
|
| 311 |
+
"max_tokens": 4000
|
| 312 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 313 |
|
| 314 |
# Use the enhanced retry wrapper for API call
|
| 315 |
def api_call():
|
| 316 |
+
# Need to pass model name correctly for Gemini via OpenAI lib
|
| 317 |
+
# The model name needs to be part of the endpoint path for Gemini API usually
|
| 318 |
+
# Let's adjust how the client is called if direct model param doesn't work
|
| 319 |
+
# For now, assuming the openai lib handles it with base_url correctly
|
| 320 |
+
# If errors occur, might need 'models/' prefix in self.model_name or adjust base_url/client call
|
| 321 |
+
# Update: Using models/gemini-1.5-flash-latest which is standard
|
| 322 |
return self.openai_client.chat.completions.create(**params)
|
| 323 |
|
| 324 |
response = self._api_call_with_enhanced_retries(api_call)
|
| 325 |
+
# Assuming the response structure is similar enough
|
| 326 |
content = response.choices[0].message.content.strip()
|
| 327 |
logger.debug(f"Raw structure response: {content}")
|
| 328 |
|
|
|
|
| 349 |
return self._generate_fallback_structure(text, target_duration)
|
| 350 |
|
| 351 |
except Exception as e:
|
| 352 |
+
logger.error(f"Error generating structure: {str(e)}", exc_info=True) # Added exc_info
|
| 353 |
# Fallback in case of any error
|
| 354 |
return self._generate_fallback_structure(text, target_duration)
|
| 355 |
|
|
|
|
| 358 |
logger.info("Generating fallback structure")
|
| 359 |
|
| 360 |
params = {
|
| 361 |
+
"model": self.model_name, # Use the configured Gemini model
|
| 362 |
+
"messages": [
|
| 363 |
+
{"role": "system", "content": "You are an expert educator. Output ONLY valid JSON, no other text."},
|
| 364 |
+
{"role": "user", "content": f"""
|
| 365 |
+
Create a simplified lecture outline based on this transcript.
|
| 366 |
+
Format as JSON with:
|
| 367 |
+
- title
|
| 368 |
+
- 3 learning objectives
|
| 369 |
+
- 2 main topics with title, key concepts, subtopics
|
| 370 |
+
- 2 practical applications
|
| 371 |
+
- 3 key terms
|
| 372 |
+
|
| 373 |
+
Target duration: {target_duration} minutes
|
| 374 |
+
|
| 375 |
+
Transcript excerpt:
|
| 376 |
+
{text[:2000]}
|
| 377 |
+
"""}
|
| 378 |
+
],
|
| 379 |
+
"temperature": 0.5,
|
| 380 |
+
"max_tokens": 2000
|
| 381 |
+
}
|
| 382 |
|
| 383 |
try:
|
| 384 |
# Use the enhanced retry wrapper for API call
|
| 385 |
def api_call():
|
| 386 |
+
return self.openai_client.chat.completions.create(**params)
|
| 387 |
|
| 388 |
response = self._api_call_with_enhanced_retries(api_call)
|
| 389 |
content = response.choices[0].message.content.strip()
|
| 390 |
|
| 391 |
try:
|
| 392 |
+
return json.loads(content)
|
| 393 |
except json.JSONDecodeError:
|
| 394 |
+
logger.warning("Failed to parse fallback JSON, returning hardcoded structure.")
|
| 395 |
+
# Last resort fallback if everything fails (keep existing hardcoded)
|
| 396 |
+
# ... (hardcoded fallback structure remains the same) ...
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 397 |
except Exception as e:
|
| 398 |
+
logger.error(f"Error generating fallback structure: {str(e)}", exc_info=True) # Added exc_info
|
| 399 |
+
# Hardcoded last resort fallback (keep existing hardcoded)
|
| 400 |
+
# ... (hardcoded fallback structure remains the same) ...
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 401 |
|
| 402 |
def _generate_section(self,
|
| 403 |
section_type: str,
|
|
|
|
| 426 |
|
| 427 |
user_instructions = f"\nAdditional user instructions:\n{initial_prompt}\n" if initial_prompt else ""
|
| 428 |
|
| 429 |
+
# Base prompt - Adjusted slightly for clarity
|
| 430 |
prompt = f"""
|
| 431 |
You are creating a {section_type} section for a {time_marker} teaching lecture on "{structure_data['title']}".
|
| 432 |
{user_instructions}
|
|
|
|
| 438 |
Key terms:
|
| 439 |
{', '.join(structure_data['key_terms'])}
|
| 440 |
|
| 441 |
+
Original source (excerpt for context):
|
| 442 |
{original_text[:500]}...
|
| 443 |
"""
|
| 444 |
|
|
|
|
| 520 |
"""
|
| 521 |
|
| 522 |
try:
|
| 523 |
+
# Prepare API call parameters - simplified
|
| 524 |
params = {
|
| 525 |
+
"model": self.model_name, # Use the configured Gemini model
|
| 526 |
"messages": [
|
| 527 |
{"role": "system", "content": "You are an expert educator creating a teaching script."},
|
| 528 |
{"role": "user", "content": prompt}
|
| 529 |
],
|
| 530 |
"temperature": 0.7,
|
| 531 |
+
"max_tokens": self._calculate_max_tokens(section_type, target_words) # Keep calculation logic
|
| 532 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 533 |
|
| 534 |
# Use the enhanced retry wrapper for API call
|
| 535 |
def api_call():
|
|
|
|
| 545 |
return content
|
| 546 |
|
| 547 |
except Exception as e:
|
| 548 |
+
logger.error(f"Error during {section_type} generation: {str(e)}", exc_info=True) # Added exc_info
|
| 549 |
# Provide a minimal fallback content to avoid complete failure
|
| 550 |
return f"{time_marker} {section_type.capitalize()} (Error during generation)\n\nWe apologize, but there was an error generating this section."
|
| 551 |
|
| 552 |
def _calculate_max_tokens(self, section_type: str, target_words: int) -> int:
|
| 553 |
"""Calculate appropriate max_tokens based on section and model"""
|
| 554 |
# 1 token ≈ 4 caracteres (1 palabra ≈ 1.33 tokens)
|
| 555 |
+
# Simplified: Assume Gemini Flash has large enough context/output limits for these sections
|
| 556 |
+
# Calculate based on words * ratio + buffer
|
| 557 |
+
# Using 1.5 as ratio + 1000 buffer seems reasonable start.
|
| 558 |
+
base_tokens = int(target_words * 1.5) + 1000
|
| 559 |
+
|
| 560 |
+
# Use the overall MAX_TOKENS as a cap, but allow large section generation
|
| 561 |
+
# Be mindful of potential overall context window limits of the model (e.g., 128k for Gemini 1.5 Flash)
|
| 562 |
+
return min(base_tokens, self.MAX_TOKENS)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 563 |
|
| 564 |
def _generate_main_content(self,
|
| 565 |
structure_data: Dict,
|