|
|
import gradio as gr |
|
|
import os |
|
|
from google import genai |
|
|
from google.genai import types |
|
|
from PIL import Image |
|
|
from pathlib import Path |
|
|
import io |
|
|
|
|
|
def generate_visual_minutes(api_key, transcript_file): |
|
|
""" |
|
|
Generate visual minutes from a transcript using the fixed template. |
|
|
|
|
|
Args: |
|
|
api_key: Google API key |
|
|
transcript_file: Path to uploaded transcript file |
|
|
|
|
|
Returns: |
|
|
PIL Image of generated visual minutes |
|
|
""" |
|
|
|
|
|
if not api_key: |
|
|
raise gr.Error("Please enter your Google API Key.") |
|
|
|
|
|
if not transcript_file: |
|
|
raise gr.Error("Please upload a transcript file.") |
|
|
|
|
|
|
|
|
template_path = Path(__file__).parent / "gemini_template.jpg" |
|
|
if not template_path.exists(): |
|
|
raise gr.Error("Template file (gemini_template.jpg) not found in the repository.") |
|
|
|
|
|
try: |
|
|
template_image = Image.open(template_path) |
|
|
except Exception as e: |
|
|
raise gr.Error(f"Failed to load template image: {e}") |
|
|
|
|
|
|
|
|
try: |
|
|
with open(transcript_file, "r", encoding="utf-8") as f: |
|
|
transcript_content = f.read() |
|
|
except Exception as e: |
|
|
raise gr.Error(f"Failed to read transcript file: {e}") |
|
|
|
|
|
|
|
|
prompt = f""" |
|
|
You are an expert visual facilitator creating graphic recording minutes for a meeting. |
|
|
|
|
|
**Task:** |
|
|
Generate a final image that looks exactly like the provided layout image (`gemini_template.jpg`), but with all the boxes and sections filled in with summarized information extracted from the provided `transcript_content`. |
|
|
|
|
|
**Input Transcript:** |
|
|
\"\"\" |
|
|
{transcript_content} |
|
|
\"\"\" |
|
|
|
|
|
**Layout Instructions based on the template image:** |
|
|
|
|
|
1. **Top Banner (Title/Date):** |
|
|
* Extract a concise, relevant Title for the meeting based on the content. |
|
|
* Extract the Date mentioned in the transcript. |
|
|
|
|
|
2. **Agenda Box (Top Left, blue/orange/red section):** |
|
|
* Summarize the key discussion points into 3-4 concise bullet points. |
|
|
|
|
|
3. **Attendees Box (Top Right):** |
|
|
* List the names of the attendees correctly. |
|
|
* Group them under their respective companies (e.g., 'Company 1: AVEVA' and 'Company 2: TOTAL') based on their introductions in the transcript. |
|
|
|
|
|
4. **Objective Box (Middle Right, next to target icon):** |
|
|
* Summarize the primary goal of this specific meeting in 1 short sentence. |
|
|
|
|
|
5. **Next Step Box (Middle Left, next to clipboard icon):** |
|
|
* Summarize the agreed-upon follow-up actions or future plans mentioned at the end of the meeting. |
|
|
|
|
|
6. **Notes Columns (1.NOTE1 & 2.Note2 - The vertical avatar sections):** |
|
|
* This is the most important part. Create a visual dialogue flow representing the key takeaways. |
|
|
* Use the existing empty avatar slots in the template. |
|
|
* Identify who is speaking key points in the transcript (e.g., Arthur, Marie, Antoine, Rachid). |
|
|
* Next to their corresponding avatar slot, generate a speech bubble. check that the speech bubble is in front of the corresponding avatar. |
|
|
* Inside the speech bubble, write a very concise summary of their main point. |
|
|
* **Crucial:** Add relevant, expressive emojis inside the speech bubbles to visually represent their point (e.g., brains for AI, rockets for strategy, charts for data, warnings for caution). |
|
|
* Ensure the conversation flows logically down the columns, matching the flow of the transcript. |
|
|
* Use both Notes (note 1 AND note 2) and do not leave empty spaces in these fields. |
|
|
|
|
|
**Final Output Requirement:** |
|
|
The output must be a single image that looks like the completed infographic, maintaining the original aesthetic of the template. |
|
|
""" |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
MODEL_NAME = "gemini-3-pro-image-preview" |
|
|
|
|
|
try: |
|
|
client = genai.Client(api_key=api_key) |
|
|
|
|
|
|
|
|
aspect_ratio = "9:16" |
|
|
resolution = "2K" |
|
|
|
|
|
|
|
|
|
|
|
response = client.models.generate_content( |
|
|
model=MODEL_NAME, |
|
|
contents=[ |
|
|
prompt, |
|
|
template_image |
|
|
], |
|
|
config=types.GenerateContentConfig( |
|
|
|
|
|
response_modalities=['IMAGE'], |
|
|
image_config=types.ImageConfig( |
|
|
aspect_ratio=aspect_ratio, |
|
|
image_size=resolution |
|
|
), |
|
|
) |
|
|
) |
|
|
|
|
|
|
|
|
for part in response.parts: |
|
|
if image := part.as_image(): |
|
|
|
|
|
try: |
|
|
|
|
|
|
|
|
if hasattr(image, 'image_bytes'): |
|
|
return Image.open(io.BytesIO(image.image_bytes)) |
|
|
elif hasattr(image, '_image_bytes'): |
|
|
return Image.open(io.BytesIO(image._image_bytes)) |
|
|
else: |
|
|
|
|
|
|
|
|
raise ValueError("Could not extract bytes from GenAI Image object") |
|
|
except Exception as e: |
|
|
raise gr.Error(f"Failed to process generated image: {e}") |
|
|
|
|
|
raise gr.Error("The model completed but did not return an image. Check inputs or safety filters.") |
|
|
|
|
|
except Exception as e: |
|
|
raise gr.Error(f"An error occurred during generation: {e}") |
|
|
|
|
|
|
|
|
|
|
|
with gr.Blocks(title="Visual Minutes Generator") as demo: |
|
|
gr.Markdown("# 🖼️ Visual Minutes Generator") |
|
|
gr.Markdown(""" |
|
|
Generate visual minutes from a meeting transcript using Google's Gemini model. |
|
|
|
|
|
**The template (gemini_template.jpg) is automatically loaded from the repository.** |
|
|
""") |
|
|
|
|
|
with gr.Row(): |
|
|
with gr.Column(): |
|
|
api_key_input = gr.Textbox( |
|
|
label="Google API Key", |
|
|
type="password", |
|
|
placeholder="Enter your Google API Key", |
|
|
info="Your API key is required to use the model" |
|
|
) |
|
|
|
|
|
transcript_input = gr.File( |
|
|
label="Upload Meeting Transcript (.txt)", |
|
|
file_types=[".txt"], |
|
|
type="filepath" |
|
|
) |
|
|
|
|
|
generate_btn = gr.Button("Generate Visual Minutes", variant="primary", size="lg") |
|
|
|
|
|
with gr.Column(): |
|
|
output_image = gr.Image( |
|
|
label="Generated Visual Minutes", |
|
|
type="pil" |
|
|
) |
|
|
|
|
|
gr.Markdown(""" |
|
|
### How to use: |
|
|
1. Enter your Google API Key (get one from [Google AI Studio](https://aistudio.google.com/app/apikey)) |
|
|
2. Upload your meeting transcript as a .txt file |
|
|
3. Click "Generate Visual Minutes" |
|
|
4. Wait 30-60 seconds for the AI to generate your visual minutes |
|
|
""") |
|
|
|
|
|
|
|
|
generate_btn.click( |
|
|
fn=generate_visual_minutes, |
|
|
inputs=[api_key_input, transcript_input], |
|
|
outputs=output_image |
|
|
) |
|
|
|
|
|
if __name__ == "__main__": |
|
|
demo.launch() |
|
|
|