File size: 7,820 Bytes
96813ce
 
 
 
 
 
3e8a397
96813ce
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f669f4b
96813ce
f669f4b
96813ce
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f669f4b
96813ce
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b4fd78c
96813ce
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
338edf6
 
de4500d
 
 
 
 
 
 
 
 
 
 
 
96813ce
 
 
 
 
 
 
 
 
 
 
 
 
f669f4b
96813ce
 
 
 
 
 
 
 
de4500d
96813ce
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
import gradio as gr
import os
from google import genai
from google.genai import types
from PIL import Image
from pathlib import Path
import io

def generate_visual_minutes(api_key, transcript_file):
    """
    Generate visual minutes from a transcript using the fixed template.
    
    Args:
        api_key: Google API key
        transcript_file: Path to uploaded transcript file
    
    Returns:
        PIL Image of generated visual minutes
    """
    # Validate API key
    if not api_key:
        raise gr.Error("Please enter your Google API Key.")
    
    if not transcript_file:
        raise gr.Error("Please upload a transcript file.")
    
    # Load the fixed template from the repository
    template_path = Path(__file__).parent / "gemini_template.jpg"
    if not template_path.exists():
        raise gr.Error("Template file (gemini_template.jpg) not found in the repository.")
    
    try:
        template_image = Image.open(template_path)
    except Exception as e:
        raise gr.Error(f"Failed to load template image: {e}")
    
    # Read transcript
    try:
        with open(transcript_file, "r", encoding="utf-8") as f:
            transcript_content = f.read()
    except Exception as e:
        raise gr.Error(f"Failed to read transcript file: {e}")
    
    # Construct the detailed prompt
    prompt = f"""
You are an expert visual facilitator creating graphic recording minutes for a meeting.

**Task:**
Generate a final image that looks exactly like the provided layout image (`gemini_template.jpg`), but with all the boxes and sections filled in with summarized information extracted from the provided `transcript_content`.

**Input Transcript:**
\"\"\"
{transcript_content}
\"\"\"

**Layout Instructions based on the template image:**

1.  **Top Banner (Title/Date):**
    * Extract a concise, relevant Title for the meeting based on the content.
    * Extract the Date mentioned in the transcript.

2.  **Agenda Box (Top Left, blue/orange/red section):**
    * Summarize the key discussion points into 3-4 concise bullet points.

3.  **Attendees Box (Top Right):**
    * List the names of the attendees correctly.
    * Group them under their respective companies (e.g., 'Company 1: AVEVA' and 'Company 2: TOTAL') based on their introductions in the transcript.

4.  **Objective Box (Middle Right, next to target icon):**
    * Summarize the primary goal of this specific meeting in 1 short sentence.

5.  **Next Step Box (Middle Left, next to clipboard icon):**
    * Summarize the agreed-upon follow-up actions or future plans mentioned at the end of the meeting.

6.  **Notes Columns (1.NOTE1 & 2.Note2 - The vertical avatar sections):**
    * This is the most important part. Create a visual dialogue flow representing the key takeaways.
    * Use the existing empty avatar slots in the template.
    * Identify who is speaking key points in the transcript (e.g., Arthur, Marie, Antoine, Rachid).
    * Next to their corresponding avatar slot, generate a speech bubble. check that the speech bubble is in front of the corresponding avatar.
    * Inside the speech bubble, write a very concise summary of their main point.
    * **Crucial:** Add relevant, expressive emojis inside the speech bubbles to visually represent their point (e.g., brains for AI, rockets for strategy, charts for data, warnings for caution).
    * Ensure the conversation flows logically down the columns, matching the flow of the transcript.
    * Use both Notes (note 1 AND note 2) and do not leave empty spaces in these fields.

**Final Output Requirement:**
The output must be a single image that looks like the completed infographic, maintaining the original aesthetic of the template.
"""
    
    # Initialize client and call API
    # The user's original script used "gemini-3-pro-image-preview" which supports image generation.
    # "gemini-2.0-flash-exp" likely does not support direct image generation output.
    MODEL_NAME = "gemini-3-pro-image-preview"
    
    try:
        client = genai.Client(api_key=api_key)
        
        # The template is tall and narrow, so 9:16 aspect ratio is best suited.
        aspect_ratio = "9:16"
        resolution = "2K"
        
        # We pass the prompt AND the template image as contents.
        # The model uses the image as the structural constraint.
        response = client.models.generate_content(
            model=MODEL_NAME,
            contents=[
                prompt,
                template_image
            ],
            config=types.GenerateContentConfig(
                # We only want an image back, not text explaining the image
                response_modalities=['IMAGE'],
                image_config=types.ImageConfig(
                    aspect_ratio=aspect_ratio,
                    image_size=resolution
                ),
            )
        )
        
        # Extract and return the generated image
        for part in response.parts:
            if image := part.as_image():
                # Convert google.genai.types.Image to PIL Image for Gradio
                try:
                    # The Google GenAI SDK's image.save() expects a path, not a buffer.
                    # However, the object has an .image_bytes attribute we can use directly.
                    if hasattr(image, 'image_bytes'):
                        return Image.open(io.BytesIO(image.image_bytes))
                    elif hasattr(image, '_image_bytes'):
                        return Image.open(io.BytesIO(image._image_bytes))
                    else:
                        # If we can't find bytes, try saving to a temp file as a last resort
                        # But likely one of the above will work based on the traceback
                        raise ValueError("Could not extract bytes from GenAI Image object")
                except Exception as e:
                    raise gr.Error(f"Failed to process generated image: {e}")
        
        raise gr.Error("The model completed but did not return an image. Check inputs or safety filters.")
    
    except Exception as e:
        raise gr.Error(f"An error occurred during generation: {e}")


# --- Gradio UI ---
with gr.Blocks(title="Visual Minutes Generator") as demo:
    gr.Markdown("# 🖼️ Visual Minutes Generator")
    gr.Markdown("""
    Generate visual minutes from a meeting transcript using Google's Gemini model.
    
    **The template (gemini_template.jpg) is automatically loaded from the repository.**
    """)
    
    with gr.Row():
        with gr.Column():
            api_key_input = gr.Textbox(
                label="Google API Key",
                type="password",
                placeholder="Enter your Google API Key",
                info="Your API key is required to use the model"
            )
            
            transcript_input = gr.File(
                label="Upload Meeting Transcript (.txt)",
                file_types=[".txt"],
                type="filepath"
            )
            
            generate_btn = gr.Button("Generate Visual Minutes", variant="primary", size="lg")
        
        with gr.Column():
            output_image = gr.Image(
                label="Generated Visual Minutes",
                type="pil"
            )
    
    gr.Markdown("""
    ### How to use:
    1. Enter your Google API Key (get one from [Google AI Studio](https://aistudio.google.com/app/apikey))
    2. Upload your meeting transcript as a .txt file
    3. Click "Generate Visual Minutes"
    4. Wait 30-60 seconds for the AI to generate your visual minutes
    """)
    
    # Connect the button to the function
    generate_btn.click(
        fn=generate_visual_minutes,
        inputs=[api_key_input, transcript_input],
        outputs=output_image
    )

if __name__ == "__main__":
    demo.launch()