Spaces:

rogeliorichman
/

AI_Agent_Script_Builder

Sleeping

App Files Files Community

rogeliorichman commited on Apr 8, 2025

Commit

ee499d1

verified ·

1 Parent(s): d7aef5b

Upload folder using huggingface_hub

Browse files

Files changed (2) hide show

requirements.txt +3 -2
src/app.py +70 -13

requirements.txt CHANGED Viewed

@@ -1,4 +1,4 @@
-gradio>=4.0.0
 transformers>=4.30.0
 torch>=2.0.0
 pypdf2>=3.0.0
@@ -6,4 +6,5 @@ python-dotenv>=0.19.0
 numpy>=1.21.0
 tqdm>=4.65.0
 openai>=1.0.0
-tiktoken>=0.5.0

+gradio==4.26.0
 transformers>=4.30.0
 torch>=2.0.0
 pypdf2>=3.0.0
 numpy>=1.21.0
 tqdm>=4.65.0
 openai>=1.0.0
+tiktoken>=0.5.0
+fastapi<0.110.0

src/app.py CHANGED Viewed

@@ -1,12 +1,17 @@
 import os
 import gradio as gr
 import re
 from dotenv import load_dotenv
 from src.core.transformer import TranscriptTransformer
 from src.utils.pdf_processor import PDFProcessor
 from src.utils.text_processor import TextProcessor
 load_dotenv()
 # Translations dictionary for UI elements
 TRANSLATIONS = {
@@ -23,6 +28,7 @@ TRANSLATIONS = {
         "guiding_prompt_info": "The Guiding Prompt allows you to provide specific instructions to modify the generated content, like output/desired LANGUAGE. You can use it to change the tone, style, focus ONLY on specific sections of the text, specify the output language (e.g., 'Generate in Spanish/French/German'), or give any other instruction that helps personalize the final result.",
         "duration_label": "Target Lecture Duration (minutes)",
         "examples_label": "Include Practical Examples",
         "submit_button": "Transform Transcript",
         "output_label": "Generated Teaching Transcript",
         "error_no_pdf": "Error: No PDF file uploaded",
@@ -45,6 +51,7 @@ TRANSLATIONS = {
         "guiding_prompt_info": "Las Instrucciones Guía te permiten proporcionar indicaciones específicas para modificar el contenido generado, como el IDIOMA deseado. Puedes usarlas para cambiar el tono, estilo, enfocarte SOLO en secciones específicas del texto, especificar el idioma de salida (ej., 'Generar en inglés/francés/alemán'), o dar cualquier otra instrucción que ayude a personalizar el resultado final.",
         "duration_label": "Duración Objetivo de la Clase (minutos)",
         "examples_label": "Incluir Ejemplos Prácticos",
         "submit_button": "Transformar Transcripción",
         "output_label": "Guión de Enseñanza Generado",
         "error_no_pdf": "Error: No se ha subido ningún archivo PDF",
@@ -64,12 +71,14 @@ LANGUAGE_PROMPTS = {
 class TranscriptTransformerApp:
     def __init__(self):
         self.pdf_processor = PDFProcessor()
         self.text_processor = TextProcessor()
         self.current_language = "en"  # Default language
         self.last_generated_content = ""  # Store the last generated content
         self.content_with_timestamps = ""  # Store content with timestamps
         self.content_without_timestamps = ""  # Store content without timestamps
     def process_transcript(self,
                            language: str,
@@ -78,7 +87,9 @@ class TranscriptTransformerApp:
                            raw_text_input: str = "",
                            initial_prompt: str = "",
                            target_duration: int = 30,
-                           include_examples: bool = True) -> str:
         """
         Process uploaded transcript and transform it into a teaching transcript
@@ -90,12 +101,23 @@ class TranscriptTransformerApp:
             initial_prompt: Additional guiding instructions for the content generation
             target_duration: Target lecture duration in minutes
             include_examples: Whether to include practical examples
         Returns:
             str: Generated teaching transcript
         """
         try:
-            self.transformer = TranscriptTransformer()
             # Get text based on input type
             if input_type == TRANSLATIONS[language]["input_type_options"][0]:  # PDF
@@ -131,32 +153,43 @@ class TranscriptTransformerApp:
             # Store the generated content
             self.content_with_timestamps = lecture_transcript
             # Create a version without timestamps
             self.content_without_timestamps = self.remove_timestamps(lecture_transcript)
             # Default: show content with timestamps
             self.last_generated_content = lecture_transcript
             return lecture_transcript
         except Exception as e:
             return f"{TRANSLATIONS[language]['error_prefix']}{str(e)}"
     def remove_timestamps(self, text):
         """Remove all timestamps (e.g., [00:00]) from the text"""
         # Regex to match the timestamp pattern [MM:SS] or [HH:MM:SS]
-        return re.sub(r'\[\d{1,2}:\d{2}(:\d{2})?\]', '', text)
     def toggle_timestamps(self, show_timestamps):
         """Toggle visibility of timestamps in output"""
         if show_timestamps:
             return self.content_with_timestamps
         else:
             return self.content_without_timestamps
     def update_ui_language(self, language):
         """Update UI elements based on selected language"""
         self.current_language = language
         translations = TRANSLATIONS[language]
@@ -174,14 +207,18 @@ class TranscriptTransformerApp:
             translations["guiding_prompt_info"],
             translations["duration_label"],
             translations["examples_label"],
             translations["submit_button"],
             translations["output_label"]
         ]
     def launch(self):
         """Launch the Gradio interface"""
         # Get the path to the example PDF
         example_pdf = os.path.join(os.path.dirname(os.path.dirname(__file__)), "data", "sample2.pdf")
         with gr.Blocks(title=TRANSLATIONS["en"]["title"]) as interface:
             # Header with title and language selector side by side
@@ -235,18 +272,23 @@ class TranscriptTransformerApp:
             # Settings row
             with gr.Row():
-                target_duration = gr.Slider(
                     label=TRANSLATIONS["en"]["duration_label"],
-                    minimum=5,
-                    maximum=180,
                     value=30,
-                    step=5
                 )
                 include_examples = gr.Checkbox(
                     label=TRANSLATIONS["en"]["examples_label"],
                     value=True
                 )
             # Submit button
             with gr.Row():
@@ -282,10 +324,14 @@ class TranscriptTransformerApp:
             # Get language code from display value
             def get_language_code(language_display):
-                return lang_map.get(language_display, "en")
             # Update UI elements when language changes
             def update_ui_with_display(language_display):
                 language = get_language_code(language_display)
                 self.current_language = language
@@ -301,10 +347,12 @@ class TranscriptTransformerApp:
                     gr.update(label=translations["guiding_prompt_label"], placeholder=translations["guiding_prompt_placeholder"], info=translations["guiding_prompt_info"]),
                     gr.update(label=translations["duration_label"]),
                     gr.update(label=translations["examples_label"]),
                     translations["submit_button"],
                     gr.update(label=translations["output_label"]),
                     gr.update(label=translations["show_timestamps"])
                 ]
             input_type.change(
                 fn=lambda lang_display, choice: update_input_visibility(lang_display, choice),
@@ -321,7 +369,7 @@ class TranscriptTransformerApp:
                     input_type, input_type,
                     file_input, text_input,
                     initial_prompt,
-                    target_duration, include_examples,
                     submit_btn, output,
                     timestamps_checkbox
                 ]
@@ -344,19 +392,28 @@ class TranscriptTransformerApp:
                     text_input,
                     initial_prompt,
                     target_duration,
-                    include_examples
                 ],
                 outputs=output
             )
             # Example for PDF input
             gr.Examples(
-                examples=[[example_pdf, "", "", 30, True]],
-                inputs=[file_input, text_input, initial_prompt, target_duration, include_examples]
             )
         interface.launch(share=True)
 if __name__ == "__main__":
     app = TranscriptTransformerApp()
-    app.launch()

 import os
 import gradio as gr
 import re
+import logging # Added for debugging
 from dotenv import load_dotenv
 from src.core.transformer import TranscriptTransformer
 from src.utils.pdf_processor import PDFProcessor
 from src.utils.text_processor import TextProcessor
+# Set up basic logging
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 load_dotenv()
+logging.info("Environment variables loaded.")
 # Translations dictionary for UI elements
 TRANSLATIONS = {
         "guiding_prompt_info": "The Guiding Prompt allows you to provide specific instructions to modify the generated content, like output/desired LANGUAGE. You can use it to change the tone, style, focus ONLY on specific sections of the text, specify the output language (e.g., 'Generate in Spanish/French/German'), or give any other instruction that helps personalize the final result.",
         "duration_label": "Target Lecture Duration (minutes)",
         "examples_label": "Include Practical Examples",
+        "thinking_model_label": "Use Experimental Thinking Model (Gemini Only)",
         "submit_button": "Transform Transcript",
         "output_label": "Generated Teaching Transcript",
         "error_no_pdf": "Error: No PDF file uploaded",
         "guiding_prompt_info": "Las Instrucciones Guía te permiten proporcionar indicaciones específicas para modificar el contenido generado, como el IDIOMA deseado. Puedes usarlas para cambiar el tono, estilo, enfocarte SOLO en secciones específicas del texto, especificar el idioma de salida (ej., 'Generar en inglés/francés/alemán'), o dar cualquier otra instrucción que ayude a personalizar el resultado final.",
         "duration_label": "Duración Objetivo de la Clase (minutos)",
         "examples_label": "Incluir Ejemplos Prácticos",
+        "thinking_model_label": "Usar Modelo de Pensamiento Experimental (Solo Gemini)",
         "submit_button": "Transformar Transcripción",
         "output_label": "Guión de Enseñanza Generado",
         "error_no_pdf": "Error: No se ha subido ningún archivo PDF",
 class TranscriptTransformerApp:
     def __init__(self):
+        logging.info("Initializing TranscriptTransformerApp...")
         self.pdf_processor = PDFProcessor()
         self.text_processor = TextProcessor()
         self.current_language = "en"  # Default language
         self.last_generated_content = ""  # Store the last generated content
         self.content_with_timestamps = ""  # Store content with timestamps
         self.content_without_timestamps = ""  # Store content without timestamps
+        logging.info("TranscriptTransformerApp initialized.")
     def process_transcript(self,
                            language: str,
                            raw_text_input: str = "",
                            initial_prompt: str = "",
                            target_duration: int = 30,
+                           include_examples: bool = True,
+                           use_gemini: bool = True,
+                           use_thinking_model: bool = False) -> str:
         """
         Process uploaded transcript and transform it into a teaching transcript
             initial_prompt: Additional guiding instructions for the content generation
             target_duration: Target lecture duration in minutes
             include_examples: Whether to include practical examples
+            use_gemini: Whether to use Gemini API instead of OpenAI
+            use_thinking_model: Requires use_gemini=True
         Returns:
             str: Generated teaching transcript
         """
+        logging.info(f"Processing transcript. Language: {language}, InputType: {input_type}, HasFile: {file_obj is not None}, HasText: {bool(raw_text_input)}, Duration: {target_duration}, Examples: {include_examples}, Gemini: {use_gemini}, ThinkingModel: {use_thinking_model}")
         try:
+            # Force enable Gemini if thinking model is selected
+            if use_thinking_model:
+                logging.info("Thinking model selected, forcing use_gemini=True")
+                use_gemini = True
+            self.transformer = TranscriptTransformer(
+                use_gemini=use_gemini,
+                use_thinking_model=use_thinking_model
+            )
             # Get text based on input type
             if input_type == TRANSLATIONS[language]["input_type_options"][0]:  # PDF
             # Store the generated content
             self.content_with_timestamps = lecture_transcript
+            logging.info("Generated content stored (with timestamps).")
             # Create a version without timestamps
             self.content_without_timestamps = self.remove_timestamps(lecture_transcript)
+            logging.info("Generated content stored (without timestamps).")
             # Default: show content with timestamps
             self.last_generated_content = lecture_transcript
+            logging.info("Transcript processing successful.")
             return lecture_transcript
         except Exception as e:
+            logging.error(f"Error processing transcript: {e}", exc_info=True) # Log exception info
             return f"{TRANSLATIONS[language]['error_prefix']}{str(e)}"
     def remove_timestamps(self, text):
         """Remove all timestamps (e.g., [00:00]) from the text"""
+        logging.info("Removing timestamps...")
         # Regex to match the timestamp pattern [MM:SS] or [HH:MM:SS]
+        result = re.sub(r'\[\d{1,2}:\d{2}(:\d{2})?\]', '', text)
+        logging.info("Timestamps removed.")
+        return result
     def toggle_timestamps(self, show_timestamps):
         """Toggle visibility of timestamps in output"""
+        logging.info(f"Toggling timestamps visibility. Show: {show_timestamps}")
         if show_timestamps:
+            logging.info("Returning content WITH timestamps.")
             return self.content_with_timestamps
         else:
+            logging.info("Returning content WITHOUT timestamps.")
             return self.content_without_timestamps
     def update_ui_language(self, language):
         """Update UI elements based on selected language"""
+        logging.info(f"Updating UI language to: {language}")
         self.current_language = language
         translations = TRANSLATIONS[language]
             translations["guiding_prompt_info"],
             translations["duration_label"],
             translations["examples_label"],
+            translations["thinking_model_label"],
             translations["submit_button"],
             translations["output_label"]
         ]
+        logging.info("UI language updated.")
     def launch(self):
         """Launch the Gradio interface"""
+        logging.info("Configuring Gradio interface...")
         # Get the path to the example PDF
         example_pdf = os.path.join(os.path.dirname(os.path.dirname(__file__)), "data", "sample2.pdf")
+        logging.info(f"Example PDF path: {example_pdf}")
         with gr.Blocks(title=TRANSLATIONS["en"]["title"]) as interface:
             # Header with title and language selector side by side
             # Settings row
             with gr.Row():
+                target_duration = gr.Number(
                     label=TRANSLATIONS["en"]["duration_label"],
                     value=30,
+                    minimum=2,
+                    maximum=60,
+                    step=1
                 )
                 include_examples = gr.Checkbox(
                     label=TRANSLATIONS["en"]["examples_label"],
                     value=True
                 )
+                use_thinking_model = gr.Checkbox(
+                    label=TRANSLATIONS["en"]["thinking_model_label"],
+                    value=True
+                )
             # Submit button
             with gr.Row():
             # Get language code from display value
             def get_language_code(language_display):
+                logging.info(f"Getting language code for display value: {language_display}")
+                code = lang_map.get(language_display, "en")
+                logging.info(f"Language code: {code}")
+                return code
             # Update UI elements when language changes
             def update_ui_with_display(language_display):
+                logging.info(f"Update UI triggered for language: {language_display}")
                 language = get_language_code(language_display)
                 self.current_language = language
                     gr.update(label=translations["guiding_prompt_label"], placeholder=translations["guiding_prompt_placeholder"], info=translations["guiding_prompt_info"]),
                     gr.update(label=translations["duration_label"]),
                     gr.update(label=translations["examples_label"]),
+                    gr.update(label=translations["thinking_model_label"]),
                     translations["submit_button"],
                     gr.update(label=translations["output_label"]),
                     gr.update(label=translations["show_timestamps"])
                 ]
+                logging.info("UI elements update values prepared.")
             input_type.change(
                 fn=lambda lang_display, choice: update_input_visibility(lang_display, choice),
                     input_type, input_type,
                     file_input, text_input,
                     initial_prompt,
+                    target_duration, include_examples, use_thinking_model,
                     submit_btn, output,
                     timestamps_checkbox
                 ]
                     text_input,
                     initial_prompt,
                     target_duration,
+                    include_examples,
+                    use_thinking_model
                 ],
                 outputs=output
             )
             # Example for PDF input
+            logging.info("Setting up Gradio Examples...")
             gr.Examples(
+                examples=[[example_pdf, "", "", 30, True, True]],
+                inputs=[file_input, text_input, initial_prompt, target_duration, include_examples, use_thinking_model]
             )
+            logging.info("Gradio Examples configured.")
+        logging.info("Launching Gradio interface...")
+        # Note: Setting share=True is not recommended/supported in Spaces, but kept for consistency with original code
+        # It might generate a warning, which is expected.
         interface.launch(share=True)
+        logging.info("Gradio interface launched.")
 if __name__ == "__main__":
+    logging.info("Starting application...")
     app = TranscriptTransformerApp()
+    app.launch()
+    logging.info("Application finished.")