c1tr0n75 commited on
Commit
96813ce
·
0 Parent(s):

feat: Update Gemini model

Browse files
Files changed (6) hide show
  1. .gitattributes +35 -0
  2. .gitignore +4 -0
  3. README.md +28 -0
  4. app.py +176 -0
  5. requirements.txt +5 -0
  6. template.jpg +0 -0
.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ .env
2
+ *.pyc
3
+ __pycache__/
4
+ visual_minutes_output.png
README.md ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Visual Minutes Generator
3
+ emoji: 🖼️
4
+ colorFrom: blue
5
+ colorTo: purple
6
+ sdk: gradio
7
+ sdk_version: 5.44.0
8
+ app_file: app.py
9
+ pinned: false
10
+ license: mit
11
+ short_description: A Space that generates visual minutes of meetings
12
+ ---
13
+
14
+ # Visual Minutes Generator
15
+
16
+ This is a Hugging Face Space that generates visual minutes from a meeting transcript using a template image and Google's Gemini 3 Pro model.
17
+
18
+ ## How to use
19
+
20
+ 1. Enter your Google API Key in the input box (or set it as a Secret).
21
+ 2. Upload a meeting transcript (text file).
22
+ 3. Click "Generate Visual Minutes".
23
+
24
+ *The template (template.jpg) is automatically loaded from the repository.*
25
+
26
+ ## Requirements
27
+
28
+ * Google API Key with access to Gemini 3 Pro / Gemini 2.0 Flash Exp.
app.py ADDED
@@ -0,0 +1,176 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import os
3
+ from google import genai
4
+ from google.genai import types
5
+ from PIL import Image
6
+ from pathlib import Path
7
+
8
+ def generate_visual_minutes(api_key, transcript_file):
9
+ """
10
+ Generate visual minutes from a transcript using the fixed template.
11
+
12
+ Args:
13
+ api_key: Google API key
14
+ transcript_file: Path to uploaded transcript file
15
+
16
+ Returns:
17
+ PIL Image of generated visual minutes
18
+ """
19
+ # Validate API key
20
+ if not api_key:
21
+ raise gr.Error("Please enter your Google API Key.")
22
+
23
+ if not transcript_file:
24
+ raise gr.Error("Please upload a transcript file.")
25
+
26
+ # Load the fixed template from the repository
27
+ template_path = Path(__file__).parent / "template.jpg"
28
+ if not template_path.exists():
29
+ raise gr.Error("Template file (template.jpg) not found in the repository.")
30
+
31
+ try:
32
+ template_image = Image.open(template_path)
33
+ except Exception as e:
34
+ raise gr.Error(f"Failed to load template image: {e}")
35
+
36
+ # Read transcript
37
+ try:
38
+ with open(transcript_file, "r", encoding="utf-8") as f:
39
+ transcript_content = f.read()
40
+ except Exception as e:
41
+ raise gr.Error(f"Failed to read transcript file: {e}")
42
+
43
+ # Construct the detailed prompt
44
+ prompt = f"""
45
+ You are an expert visual facilitator creating graphic recording minutes for a meeting.
46
+
47
+ **Task:**
48
+ Generate a final image that looks exactly like the provided layout image (`template.jpg`), but with all the empty boxes and sections filled in with summarized information extracted from the provided `transcript_content`.
49
+
50
+ **Input Transcript:**
51
+ \"\"\"
52
+ {transcript_content}
53
+ \"\"\"
54
+
55
+ **Layout Instructions based on the template image:**
56
+
57
+ 1. **Top Banner (Title/Date):**
58
+ * Extract a concise, relevant Title for the meeting based on the content.
59
+ * Extract the Date mentioned in the transcript.
60
+
61
+ 2. **Agenda Box (Top Left, blue/orange/red section):**
62
+ * Summarize the key discussion points into 3-4 concise bullet points.
63
+
64
+ 3. **Attendees Box (Top Right):**
65
+ * List the names of the attendees correctly.
66
+ * Group them under their respective companies (e.g., 'Company 1: AVEVA' and 'Company 2: TOTAL') based on their introductions in the transcript.
67
+
68
+ 4. **Objective Box (Middle Right, next to target icon):**
69
+ * Summarize the primary goal of this specific meeting in 1 short sentence.
70
+
71
+ 5. **Next Step Box (Middle Left, next to clipboard icon):**
72
+ * Summarize the agreed-upon follow-up actions or future plans mentioned at the end of the meeting.
73
+
74
+ 6. **Notes Columns (1.NOTE1 & 2.Note2 - The vertical avatar sections):**
75
+ * This is the most important part. Create a visual dialogue flow representing the key takeaways.
76
+ * Use the existing empty avatar slots in the template.
77
+ * Identify who is speaking key points in the transcript (e.g., Arthur, Marie, Antoine, Rachid).
78
+ * Next to their corresponding avatar slot, generate a speech bubble. check that the speech bubble is in front of the corresponding avatar.
79
+ * Inside the speech bubble, write a very concise summary of their main point.
80
+ * **Crucial:** Add relevant, expressive emojis inside the speech bubbles to visually represent their point (e.g., brains for AI, rockets for strategy, charts for data, warnings for caution).
81
+ * Ensure the conversation flows logically down the columns, matching the flow of the transcript.
82
+
83
+ **Final Output Requirement:**
84
+ The output must be a single image that looks like the completed infographic, maintaining the original aesthetic of the template.
85
+ """
86
+
87
+ # Initialize client and call API
88
+ # The user's original script used "gemini-3-pro-image-preview" which supports image generation.
89
+ # "gemini-2.0-flash-exp" likely does not support direct image generation output.
90
+ MODEL_NAME = "gemini-3-pro-image-preview"
91
+
92
+ try:
93
+ client = genai.Client(api_key=api_key)
94
+
95
+ # The template is tall and narrow, so 9:16 aspect ratio is best suited.
96
+ aspect_ratio = "9:16"
97
+ resolution = "2K"
98
+
99
+ # We pass the prompt AND the template image as contents.
100
+ # The model uses the image as the structural constraint.
101
+ response = client.models.generate_content(
102
+ model=MODEL_NAME,
103
+ contents=[
104
+ prompt,
105
+ template_image
106
+ ],
107
+ config=types.GenerateContentConfig(
108
+ # We only want an image back, not text explaining the image
109
+ response_modalities=['IMAGE'],
110
+ image_config=types.ImageConfig(
111
+ aspect_ratio=aspect_ratio,
112
+ image_size=resolution
113
+ ),
114
+ )
115
+ )
116
+
117
+ # Extract and return the generated image
118
+ for part in response.parts:
119
+ if image := part.as_image():
120
+ return image
121
+
122
+ raise gr.Error("The model completed but did not return an image. Check inputs or safety filters.")
123
+
124
+ except Exception as e:
125
+ raise gr.Error(f"An error occurred during generation: {e}")
126
+
127
+
128
+ # --- Gradio UI ---
129
+ with gr.Blocks(title="Visual Minutes Generator") as demo:
130
+ gr.Markdown("# 🖼️ Visual Minutes Generator")
131
+ gr.Markdown("""
132
+ Generate visual minutes from a meeting transcript using Google's Gemini model.
133
+
134
+ **The template (template.jpg) is automatically loaded from the repository.**
135
+ """)
136
+
137
+ with gr.Row():
138
+ with gr.Column():
139
+ api_key_input = gr.Textbox(
140
+ label="Google API Key",
141
+ type="password",
142
+ placeholder="Enter your Google API Key",
143
+ info="Your API key is required to use the Gemini model"
144
+ )
145
+
146
+ transcript_input = gr.File(
147
+ label="Upload Meeting Transcript (.txt)",
148
+ file_types=[".txt"],
149
+ type="filepath"
150
+ )
151
+
152
+ generate_btn = gr.Button("Generate Visual Minutes", variant="primary", size="lg")
153
+
154
+ with gr.Column():
155
+ output_image = gr.Image(
156
+ label="Generated Visual Minutes",
157
+ type="pil"
158
+ )
159
+
160
+ gr.Markdown("""
161
+ ### How to use:
162
+ 1. Enter your Google API Key (get one from [Google AI Studio](https://aistudio.google.com/app/apikey))
163
+ 2. Upload your meeting transcript as a .txt file
164
+ 3. Click "Generate Visual Minutes"
165
+ 4. Wait 30-60 seconds for the AI to generate your visual minutes
166
+ """)
167
+
168
+ # Connect the button to the function
169
+ generate_btn.click(
170
+ fn=generate_visual_minutes,
171
+ inputs=[api_key_input, transcript_input],
172
+ outputs=output_image
173
+ )
174
+
175
+ if __name__ == "__main__":
176
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ google-genai
2
+ pillow
3
+ pathlib
4
+ python-dotenv
5
+ gradio
template.jpg ADDED