VisualMoM / app.py
c1tr0n75's picture
adding new gemini template
f669f4b verified
import gradio as gr
import os
from google import genai
from google.genai import types
from PIL import Image
from pathlib import Path
import io
def generate_visual_minutes(api_key, transcript_file):
"""
Generate visual minutes from a transcript using the fixed template.
Args:
api_key: Google API key
transcript_file: Path to uploaded transcript file
Returns:
PIL Image of generated visual minutes
"""
# Validate API key
if not api_key:
raise gr.Error("Please enter your Google API Key.")
if not transcript_file:
raise gr.Error("Please upload a transcript file.")
# Load the fixed template from the repository
template_path = Path(__file__).parent / "gemini_template.jpg"
if not template_path.exists():
raise gr.Error("Template file (gemini_template.jpg) not found in the repository.")
try:
template_image = Image.open(template_path)
except Exception as e:
raise gr.Error(f"Failed to load template image: {e}")
# Read transcript
try:
with open(transcript_file, "r", encoding="utf-8") as f:
transcript_content = f.read()
except Exception as e:
raise gr.Error(f"Failed to read transcript file: {e}")
# Construct the detailed prompt
prompt = f"""
You are an expert visual facilitator creating graphic recording minutes for a meeting.
**Task:**
Generate a final image that looks exactly like the provided layout image (`gemini_template.jpg`), but with all the boxes and sections filled in with summarized information extracted from the provided `transcript_content`.
**Input Transcript:**
\"\"\"
{transcript_content}
\"\"\"
**Layout Instructions based on the template image:**
1. **Top Banner (Title/Date):**
* Extract a concise, relevant Title for the meeting based on the content.
* Extract the Date mentioned in the transcript.
2. **Agenda Box (Top Left, blue/orange/red section):**
* Summarize the key discussion points into 3-4 concise bullet points.
3. **Attendees Box (Top Right):**
* List the names of the attendees correctly.
* Group them under their respective companies (e.g., 'Company 1: AVEVA' and 'Company 2: TOTAL') based on their introductions in the transcript.
4. **Objective Box (Middle Right, next to target icon):**
* Summarize the primary goal of this specific meeting in 1 short sentence.
5. **Next Step Box (Middle Left, next to clipboard icon):**
* Summarize the agreed-upon follow-up actions or future plans mentioned at the end of the meeting.
6. **Notes Columns (1.NOTE1 & 2.Note2 - The vertical avatar sections):**
* This is the most important part. Create a visual dialogue flow representing the key takeaways.
* Use the existing empty avatar slots in the template.
* Identify who is speaking key points in the transcript (e.g., Arthur, Marie, Antoine, Rachid).
* Next to their corresponding avatar slot, generate a speech bubble. check that the speech bubble is in front of the corresponding avatar.
* Inside the speech bubble, write a very concise summary of their main point.
* **Crucial:** Add relevant, expressive emojis inside the speech bubbles to visually represent their point (e.g., brains for AI, rockets for strategy, charts for data, warnings for caution).
* Ensure the conversation flows logically down the columns, matching the flow of the transcript.
* Use both Notes (note 1 AND note 2) and do not leave empty spaces in these fields.
**Final Output Requirement:**
The output must be a single image that looks like the completed infographic, maintaining the original aesthetic of the template.
"""
# Initialize client and call API
# The user's original script used "gemini-3-pro-image-preview" which supports image generation.
# "gemini-2.0-flash-exp" likely does not support direct image generation output.
MODEL_NAME = "gemini-3-pro-image-preview"
try:
client = genai.Client(api_key=api_key)
# The template is tall and narrow, so 9:16 aspect ratio is best suited.
aspect_ratio = "9:16"
resolution = "2K"
# We pass the prompt AND the template image as contents.
# The model uses the image as the structural constraint.
response = client.models.generate_content(
model=MODEL_NAME,
contents=[
prompt,
template_image
],
config=types.GenerateContentConfig(
# We only want an image back, not text explaining the image
response_modalities=['IMAGE'],
image_config=types.ImageConfig(
aspect_ratio=aspect_ratio,
image_size=resolution
),
)
)
# Extract and return the generated image
for part in response.parts:
if image := part.as_image():
# Convert google.genai.types.Image to PIL Image for Gradio
try:
# The Google GenAI SDK's image.save() expects a path, not a buffer.
# However, the object has an .image_bytes attribute we can use directly.
if hasattr(image, 'image_bytes'):
return Image.open(io.BytesIO(image.image_bytes))
elif hasattr(image, '_image_bytes'):
return Image.open(io.BytesIO(image._image_bytes))
else:
# If we can't find bytes, try saving to a temp file as a last resort
# But likely one of the above will work based on the traceback
raise ValueError("Could not extract bytes from GenAI Image object")
except Exception as e:
raise gr.Error(f"Failed to process generated image: {e}")
raise gr.Error("The model completed but did not return an image. Check inputs or safety filters.")
except Exception as e:
raise gr.Error(f"An error occurred during generation: {e}")
# --- Gradio UI ---
with gr.Blocks(title="Visual Minutes Generator") as demo:
gr.Markdown("# 🖼️ Visual Minutes Generator")
gr.Markdown("""
Generate visual minutes from a meeting transcript using Google's Gemini model.
**The template (gemini_template.jpg) is automatically loaded from the repository.**
""")
with gr.Row():
with gr.Column():
api_key_input = gr.Textbox(
label="Google API Key",
type="password",
placeholder="Enter your Google API Key",
info="Your API key is required to use the model"
)
transcript_input = gr.File(
label="Upload Meeting Transcript (.txt)",
file_types=[".txt"],
type="filepath"
)
generate_btn = gr.Button("Generate Visual Minutes", variant="primary", size="lg")
with gr.Column():
output_image = gr.Image(
label="Generated Visual Minutes",
type="pil"
)
gr.Markdown("""
### How to use:
1. Enter your Google API Key (get one from [Google AI Studio](https://aistudio.google.com/app/apikey))
2. Upload your meeting transcript as a .txt file
3. Click "Generate Visual Minutes"
4. Wait 30-60 seconds for the AI to generate your visual minutes
""")
# Connect the button to the function
generate_btn.click(
fn=generate_visual_minutes,
inputs=[api_key_input, transcript_input],
outputs=output_image
)
if __name__ == "__main__":
demo.launch()