Spaces:

c1tr0n75
/

VisualMoM

Sleeping

File size: 7,820 Bytes

import gradio as gr
import os
from google import genai
from google.genai import types
from PIL import Image
from pathlib import Path
import io

def generate_visual_minutes(api_key, transcript_file):
    """
    Generate visual minutes from a transcript using the fixed template.
    
    Args:
        api_key: Google API key
        transcript_file: Path to uploaded transcript file
    
    Returns:
        PIL Image of generated visual minutes
    """
    # Validate API key
    if not api_key:
        raise gr.Error("Please enter your Google API Key.")
    
    if not transcript_file:
        raise gr.Error("Please upload a transcript file.")
    
    # Load the fixed template from the repository
    template_path = Path(__file__).parent / "gemini_template.jpg"
    if not template_path.exists():
        raise gr.Error("Template file (gemini_template.jpg) not found in the repository.")
    
    try:
        template_image = Image.open(template_path)
    except Exception as e:
        raise gr.Error(f"Failed to load template image: {e}")
    
    # Read transcript
    try:
        with open(transcript_file, "r", encoding="utf-8") as f:
            transcript_content = f.read()
    except Exception as e:
        raise gr.Error(f"Failed to read transcript file: {e}")
    
    # Construct the detailed prompt
    prompt = f"""
You are an expert visual facilitator creating graphic recording minutes for a meeting.

**Task:**
Generate a final image that looks exactly like the provided layout image (`gemini_template.jpg`), but with all the boxes and sections filled in with summarized information extracted from the provided `transcript_content`.

**Input Transcript:**
\"\"\"
{transcript_content}
\"\"\"

**Layout Instructions based on the template image:**

1.  **Top Banner (Title/Date):**
    * Extract a concise, relevant Title for the meeting based on the content.
    * Extract the Date mentioned in the transcript.

2.  **Agenda Box (Top Left, blue/orange/red section):**
    * Summarize the key discussion points into 3-4 concise bullet points.

3.  **Attendees Box (Top Right):**
    * List the names of the attendees correctly.
    * Group them under their respective companies (e.g., 'Company 1: AVEVA' and 'Company 2: TOTAL') based on their introductions in the transcript.

4.  **Objective Box (Middle Right, next to target icon):**
    * Summarize the primary goal of this specific meeting in 1 short sentence.

5.  **Next Step Box (Middle Left, next to clipboard icon):**
    * Summarize the agreed-upon follow-up actions or future plans mentioned at the end of the meeting.

6.  **Notes Columns (1.NOTE1 & 2.Note2 - The vertical avatar sections):**
    * This is the most important part. Create a visual dialogue flow representing the key takeaways.
    * Use the existing empty avatar slots in the template.
    * Identify who is speaking key points in the transcript (e.g., Arthur, Marie, Antoine, Rachid).
    * Next to their corresponding avatar slot, generate a speech bubble. check that the speech bubble is in front of the corresponding avatar.
    * Inside the speech bubble, write a very concise summary of their main point.
    * **Crucial:** Add relevant, expressive emojis inside the speech bubbles to visually represent their point (e.g., brains for AI, rockets for strategy, charts for data, warnings for caution).
    * Ensure the conversation flows logically down the columns, matching the flow of the transcript.
    * Use both Notes (note 1 AND note 2) and do not leave empty spaces in these fields.

**Final Output Requirement:**
The output must be a single image that looks like the completed infographic, maintaining the original aesthetic of the template.
"""
    
    # Initialize client and call API
    # The user's original script used "gemini-3-pro-image-preview" which supports image generation.
    # "gemini-2.0-flash-exp" likely does not support direct image generation output.
    MODEL_NAME = "gemini-3-pro-image-preview"
    
    try:
        client = genai.Client(api_key=api_key)
        
        # The template is tall and narrow, so 9:16 aspect ratio is best suited.
        aspect_ratio = "9:16"
        resolution = "2K"
        
        # We pass the prompt AND the template image as contents.
        # The model uses the image as the structural constraint.
        response = client.models.generate_content(
            model=MODEL_NAME,
            contents=[
                prompt,
                template_image
            ],
            config=types.GenerateContentConfig(
                # We only want an image back, not text explaining the image
                response_modalities=['IMAGE'],
                image_config=types.ImageConfig(
                    aspect_ratio=aspect_ratio,
                    image_size=resolution
                ),
            )
        )
        
        # Extract and return the generated image
        for part in response.parts:
            if image := part.as_image():
                # Convert google.genai.types.Image to PIL Image for Gradio
                try:
                    # The Google GenAI SDK's image.save() expects a path, not a buffer.
                    # However, the object has an .image_bytes attribute we can use directly.
                    if hasattr(image, 'image_bytes'):
                        return Image.open(io.BytesIO(image.image_bytes))
                    elif hasattr(image, '_image_bytes'):
                        return Image.open(io.BytesIO(image._image_bytes))
                    else:
                        # If we can't find bytes, try saving to a temp file as a last resort
                        # But likely one of the above will work based on the traceback
                        raise ValueError("Could not extract bytes from GenAI Image object")
                except Exception as e:
                    raise gr.Error(f"Failed to process generated image: {e}")
        
        raise gr.Error("The model completed but did not return an image. Check inputs or safety filters.")
    
    except Exception as e:
        raise gr.Error(f"An error occurred during generation: {e}")


# --- Gradio UI ---
with gr.Blocks(title="Visual Minutes Generator") as demo:
    gr.Markdown("# 🖼️ Visual Minutes Generator")
    gr.Markdown("""
    Generate visual minutes from a meeting transcript using Google's Gemini model.
    
    **The template (gemini_template.jpg) is automatically loaded from the repository.**
    """)
    
    with gr.Row():
        with gr.Column():
            api_key_input = gr.Textbox(
                label="Google API Key",
                type="password",
                placeholder="Enter your Google API Key",
                info="Your API key is required to use the model"
            )
            
            transcript_input = gr.File(
                label="Upload Meeting Transcript (.txt)",
                file_types=[".txt"],
                type="filepath"
            )
            
            generate_btn = gr.Button("Generate Visual Minutes", variant="primary", size="lg")
        
        with gr.Column():
            output_image = gr.Image(
                label="Generated Visual Minutes",
                type="pil"
            )
    
    gr.Markdown("""
    ### How to use:
    1. Enter your Google API Key (get one from [Google AI Studio](https://aistudio.google.com/app/apikey))
    2. Upload your meeting transcript as a .txt file
    3. Click "Generate Visual Minutes"
    4. Wait 30-60 seconds for the AI to generate your visual minutes
    """)
    
    # Connect the button to the function
    generate_btn.click(
        fn=generate_visual_minutes,
        inputs=[api_key_input, transcript_input],
        outputs=output_image
    )

if __name__ == "__main__":
    demo.launch()