import gradio as gr import os from google import genai from google.genai import types from PIL import Image from pathlib import Path import io def generate_visual_minutes(api_key, transcript_file): """ Generate visual minutes from a transcript using the fixed template. Args: api_key: Google API key transcript_file: Path to uploaded transcript file Returns: PIL Image of generated visual minutes """ # Validate API key if not api_key: raise gr.Error("Please enter your Google API Key.") if not transcript_file: raise gr.Error("Please upload a transcript file.") # Load the fixed template from the repository template_path = Path(__file__).parent / "gemini_template.jpg" if not template_path.exists(): raise gr.Error("Template file (gemini_template.jpg) not found in the repository.") try: template_image = Image.open(template_path) except Exception as e: raise gr.Error(f"Failed to load template image: {e}") # Read transcript try: with open(transcript_file, "r", encoding="utf-8") as f: transcript_content = f.read() except Exception as e: raise gr.Error(f"Failed to read transcript file: {e}") # Construct the detailed prompt prompt = f""" You are an expert visual facilitator creating graphic recording minutes for a meeting. **Task:** Generate a final image that looks exactly like the provided layout image (`gemini_template.jpg`), but with all the boxes and sections filled in with summarized information extracted from the provided `transcript_content`. **Input Transcript:** \"\"\" {transcript_content} \"\"\" **Layout Instructions based on the template image:** 1. **Top Banner (Title/Date):** * Extract a concise, relevant Title for the meeting based on the content. * Extract the Date mentioned in the transcript. 2. **Agenda Box (Top Left, blue/orange/red section):** * Summarize the key discussion points into 3-4 concise bullet points. 3. **Attendees Box (Top Right):** * List the names of the attendees correctly. * Group them under their respective companies (e.g., 'Company 1: AVEVA' and 'Company 2: TOTAL') based on their introductions in the transcript. 4. **Objective Box (Middle Right, next to target icon):** * Summarize the primary goal of this specific meeting in 1 short sentence. 5. **Next Step Box (Middle Left, next to clipboard icon):** * Summarize the agreed-upon follow-up actions or future plans mentioned at the end of the meeting. 6. **Notes Columns (1.NOTE1 & 2.Note2 - The vertical avatar sections):** * This is the most important part. Create a visual dialogue flow representing the key takeaways. * Use the existing empty avatar slots in the template. * Identify who is speaking key points in the transcript (e.g., Arthur, Marie, Antoine, Rachid). * Next to their corresponding avatar slot, generate a speech bubble. check that the speech bubble is in front of the corresponding avatar. * Inside the speech bubble, write a very concise summary of their main point. * **Crucial:** Add relevant, expressive emojis inside the speech bubbles to visually represent their point (e.g., brains for AI, rockets for strategy, charts for data, warnings for caution). * Ensure the conversation flows logically down the columns, matching the flow of the transcript. * Use both Notes (note 1 AND note 2) and do not leave empty spaces in these fields. **Final Output Requirement:** The output must be a single image that looks like the completed infographic, maintaining the original aesthetic of the template. """ # Initialize client and call API # The user's original script used "gemini-3-pro-image-preview" which supports image generation. # "gemini-2.0-flash-exp" likely does not support direct image generation output. MODEL_NAME = "gemini-3-pro-image-preview" try: client = genai.Client(api_key=api_key) # The template is tall and narrow, so 9:16 aspect ratio is best suited. aspect_ratio = "9:16" resolution = "2K" # We pass the prompt AND the template image as contents. # The model uses the image as the structural constraint. response = client.models.generate_content( model=MODEL_NAME, contents=[ prompt, template_image ], config=types.GenerateContentConfig( # We only want an image back, not text explaining the image response_modalities=['IMAGE'], image_config=types.ImageConfig( aspect_ratio=aspect_ratio, image_size=resolution ), ) ) # Extract and return the generated image for part in response.parts: if image := part.as_image(): # Convert google.genai.types.Image to PIL Image for Gradio try: # The Google GenAI SDK's image.save() expects a path, not a buffer. # However, the object has an .image_bytes attribute we can use directly. if hasattr(image, 'image_bytes'): return Image.open(io.BytesIO(image.image_bytes)) elif hasattr(image, '_image_bytes'): return Image.open(io.BytesIO(image._image_bytes)) else: # If we can't find bytes, try saving to a temp file as a last resort # But likely one of the above will work based on the traceback raise ValueError("Could not extract bytes from GenAI Image object") except Exception as e: raise gr.Error(f"Failed to process generated image: {e}") raise gr.Error("The model completed but did not return an image. Check inputs or safety filters.") except Exception as e: raise gr.Error(f"An error occurred during generation: {e}") # --- Gradio UI --- with gr.Blocks(title="Visual Minutes Generator") as demo: gr.Markdown("# 🖼️ Visual Minutes Generator") gr.Markdown(""" Generate visual minutes from a meeting transcript using Google's Gemini model. **The template (gemini_template.jpg) is automatically loaded from the repository.** """) with gr.Row(): with gr.Column(): api_key_input = gr.Textbox( label="Google API Key", type="password", placeholder="Enter your Google API Key", info="Your API key is required to use the model" ) transcript_input = gr.File( label="Upload Meeting Transcript (.txt)", file_types=[".txt"], type="filepath" ) generate_btn = gr.Button("Generate Visual Minutes", variant="primary", size="lg") with gr.Column(): output_image = gr.Image( label="Generated Visual Minutes", type="pil" ) gr.Markdown(""" ### How to use: 1. Enter your Google API Key (get one from [Google AI Studio](https://aistudio.google.com/app/apikey)) 2. Upload your meeting transcript as a .txt file 3. Click "Generate Visual Minutes" 4. Wait 30-60 seconds for the AI to generate your visual minutes """) # Connect the button to the function generate_btn.click( fn=generate_visual_minutes, inputs=[api_key_input, transcript_input], outputs=output_image ) if __name__ == "__main__": demo.launch()