Chris Addis commited on
Commit
9883bdb
·
1 Parent(s): eec37f2

base version

Browse files
.ipynb_checkpoints/README-checkpoint.md ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Alt Text Gen
3
+ emoji: 📈
4
+ colorFrom: blue
5
+ colorTo: indigo
6
+ sdk: gradio
7
+ sdk_version: 5.24.0
8
+ app_file: app.py
9
+ pinned: false
10
+ ---
11
+
12
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
.ipynb_checkpoints/app-checkpoint.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+
3
+ def process_image(image):
4
+ # Simply return the image as is for display
5
+ return image
6
+
7
+ # Create the Gradio interface
8
+ with gr.Blocks() as demo:
9
+ gr.Markdown("# Image Uploader and Viewer")
10
+
11
+ with gr.Row():
12
+ with gr.Column():
13
+ input_image = gr.Image(type="pil", label="Upload an image")
14
+ upload_button = gr.Button("Display Image")
15
+
16
+ with gr.Column():
17
+ output_image = gr.Image(label="Displayed Image")
18
+
19
+ upload_button.click(
20
+ fn=process_image,
21
+ inputs=input_image,
22
+ outputs=output_image
23
+ )
24
+
25
+ # Launch the app
26
+ if __name__ == "__main__":
27
+ demo.launch()
.ipynb_checkpoints/requirements-checkpoint.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ gradio==5.24.0
2
+ pillow
app.py CHANGED
@@ -1,27 +1,498 @@
1
  import gradio as gr
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
- def process_image(image):
4
- # Simply return the image as is for display
5
- return image
 
6
 
7
- # Create the Gradio interface
8
- with gr.Blocks() as demo:
9
- gr.Markdown("# Image Uploader and Viewer")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
 
11
- with gr.Row():
12
- with gr.Column():
13
- input_image = gr.Image(type="pil", label="Upload an image")
14
- upload_button = gr.Button("Display Image")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
- with gr.Column():
17
- output_image = gr.Image(label="Displayed Image")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
 
19
- upload_button.click(
20
- fn=process_image,
21
- inputs=input_image,
22
- outputs=output_image
23
- )
24
 
25
  # Launch the app
26
  if __name__ == "__main__":
27
- demo.launch()
 
 
1
  import gradio as gr
2
+ import numpy as np
3
+ from PIL import Image
4
+ import io
5
+ import os
6
+ import requests
7
+ import json
8
+ from dotenv import load_dotenv
9
+ import openai
10
+ import base64
11
+ import csv
12
+ import tempfile
13
+ import datetime
14
 
15
+ # Load environment variables from .env file if it exists (for local development)
16
+ # On Hugging Face Spaces, the secrets are automatically available as environment variables
17
+ if os.path.exists(".env"):
18
+ load_dotenv()
19
 
20
+ from io import BytesIO
21
+ import numpy as np
22
+ import requests
23
+ from PIL import Image
24
+
25
+ # import libraries
26
+ from library.utils_model import *
27
+ from library.utils_html import *
28
+ from library.utils_prompt import *
29
+
30
+ OR = OpenRouterAPI()
31
+ gemini = OpenRouterAPI(api_key = os.getenv("GEMINI_API_KEY"),base_url="https://generativelanguage.googleapis.com/v1beta/openai/")
32
+
33
+ # Path for storing user preferences
34
+ PREFERENCES_FILE = "data/user_preferences.csv"
35
+
36
+ # Ensure directory exists
37
+ os.makedirs(os.path.dirname(PREFERENCES_FILE), exist_ok=True)
38
+
39
+ def save_preference(image_path, model_a_text, model_b_text, preferred_model):
40
+ """Save user preference data to a CSV file"""
41
+ # Check if file exists, create with header if not
42
+ file_exists = os.path.isfile(PREFERENCES_FILE)
43
+
44
+ # Get image filename instead of full path
45
+ image_filename = os.path.basename(image_path)
46
+
47
+ # Open file in append mode
48
+ with open(PREFERENCES_FILE, 'a', newline='') as f:
49
+ writer = csv.writer(f)
50
+
51
+ # Write header if file is new
52
+ if not file_exists:
53
+ writer.writerow(['timestamp', 'image', 'model_a_text', 'model_b_text', 'preferred_model'])
54
+
55
+ # Write data row
56
+ writer.writerow([
57
+ datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
58
+ image_filename,
59
+ model_a_text,
60
+ model_b_text,
61
+ preferred_model
62
+ ])
63
+
64
+ return True
65
+
66
+ def create_csv_file(results):
67
+ """Create a CSV file from the results and return the path"""
68
+ # Create a temporary file
69
+ fd, path = tempfile.mkstemp(suffix='.csv')
70
 
71
+ with os.fdopen(fd, 'w', newline='') as f:
72
+ writer = csv.writer(f)
73
+ # Write header
74
+ writer.writerow(['image_id', 'model_a_content', 'model_b_content', 'preferred_model'])
75
+ # Write data
76
+ for result in results:
77
+ writer.writerow([
78
+ result.get('image_id', ''),
79
+ result.get('model_a_content', ''),
80
+ result.get('model_b_content', ''),
81
+ result.get('preferred_model', '')
82
+ ])
83
+
84
+ return path
85
+
86
+ # Extract original filename without path or extension
87
+ def get_base_filename(filepath):
88
+ if not filepath:
89
+ return ""
90
+ # Get the basename (filename with extension)
91
+ basename = os.path.basename(filepath)
92
+ # Remove extension
93
+ filename = os.path.splitext(basename)[0]
94
+ return filename
95
+
96
+ # Define the Gradio interface
97
+ def create_demo():
98
+ with gr.Blocks(theme=gr.themes.Monochrome()) as demo:
99
+ gr.Markdown("# AI Alt-text Generator")
100
+ gr.Markdown("Upload one or more images to generate Alt-text")
101
+ gr.Markdown("Developed by the Natural History Museum in Partnership with National Museums Liverpool.")
102
+
103
+ with gr.Row():
104
+ # Left column: Controls and uploads
105
+ with gr.Column(scale=1):
106
+ # Upload interface
107
+ upload_button = gr.UploadButton(
108
+ "Click to Upload Images",
109
+ file_types=["image"],
110
+ file_count="multiple"
111
+ )
112
+
113
+ # Add model selection dropdown
114
+ model_choice = gr.Dropdown(
115
+ choices=["GPT-4o", "Default"],
116
+ label="Select Model",
117
+ value="Default",
118
+ visible=True
119
+ )
120
+
121
+ # Add comparison mode checkbox
122
+ comparison_mode = gr.Checkbox(
123
+ label="Enable A/B Testing Mode",
124
+ value=False,
125
+ info="Show outputs from both models and select preferred"
126
+ )
127
+
128
+ # Label the models in comparison mode
129
+ with gr.Group(visible=False) as comparison_labels:
130
+ gr.Markdown("### Model A: GPT-4o")
131
+ gr.Markdown("### Model B: Default (GPT-4o-mini)")
132
+
133
+ # Preview gallery for uploaded images
134
+ gr.Markdown("### Uploaded Images")
135
+ input_gallery = gr.Gallery(
136
+ label="",
137
+ columns=3,
138
+ height=150,
139
+ object_fit="contain"
140
+ )
141
+
142
+ # Analysis button
143
+ analyze_button = gr.Button("Analyze Images", variant="primary", size="lg")
144
+
145
+ # Hidden state component to store image info
146
+ image_state = gr.State([])
147
+ filename_state = gr.State([])
148
+
149
+ # CSV download component
150
+ csv_download = gr.File(label="CSV Results")
151
+
152
+ # Right column: Display area
153
+ with gr.Column(scale=2):
154
+ # Image display
155
+ current_image = gr.Image(
156
+ label="Current Image",
157
+ height=400,
158
+ type="filepath",
159
+ show_download_button=True,
160
+ show_share_button=True
161
+ )
162
+
163
+ # Navigation row
164
+ with gr.Row():
165
+ prev_button = gr.Button("← Previous", size="sm")
166
+ image_counter = gr.Markdown("", elem_id="image-counter")
167
+ next_button = gr.Button("Next →", size="sm")
168
+
169
+ # Standard single model output view
170
+ with gr.Column(visible=True) as single_model_view:
171
+ # Alt-text heading
172
+ gr.Markdown("### Generated Alt-text")
173
+
174
+ # Alt-text
175
+ analysis_text = gr.Textbox(
176
+ label="",
177
+ value="Please analyze images to see results",
178
+ lines=6,
179
+ max_lines=10,
180
+ interactive=False,
181
+ show_label=False
182
+ )
183
+
184
+ # Comparison view for A/B testing
185
+ with gr.Column(visible=False) as comparison_view:
186
+ gr.Markdown("### Compare Generated Alt-text")
187
+
188
+ with gr.Row() as model_outputs:
189
+ # Model A output
190
+ with gr.Column():
191
+ gr.Markdown("#### Model A (GPT-4o)")
192
+ model_a_text = gr.Textbox(
193
+ label="",
194
+ value="",
195
+ lines=5,
196
+ max_lines=8,
197
+ interactive=False,
198
+ show_label=False
199
+ )
200
+ model_a_button = gr.Button("Select Model A", variant="secondary")
201
+
202
+ # Model B output
203
+ with gr.Column():
204
+ gr.Markdown("#### Model B (Default)")
205
+ model_b_text = gr.Textbox(
206
+ label="",
207
+ value="",
208
+ lines=5,
209
+ max_lines=8,
210
+ interactive=False,
211
+ show_label=False
212
+ )
213
+ model_b_button = gr.Button("Select Model B", variant="secondary")
214
+
215
+ # Preference saved notification
216
+ preference_status = gr.Markdown("")
217
+
218
+ # Hidden state for gallery navigation and preferences
219
+ current_index = gr.State(0)
220
+ all_images = gr.State([])
221
+ all_results = gr.State([])
222
+ preference_state = gr.State([]) # To store user preferences
223
+
224
+ # Toggle comparison mode
225
+ def toggle_comparison_mode(enable_comparison):
226
+ return {
227
+ model_choice: not enable_comparison,
228
+ single_model_view: not enable_comparison,
229
+ comparison_view: enable_comparison,
230
+ comparison_labels: enable_comparison
231
+ }
232
+
233
+ comparison_mode.change(
234
+ fn=toggle_comparison_mode,
235
+ inputs=[comparison_mode],
236
+ outputs=[model_choice, single_model_view, comparison_view, comparison_labels]
237
+ )
238
+
239
+ # Handle file uploads - store files for use during analysis
240
+ def handle_upload(files):
241
+ file_paths = []
242
+ file_names = []
243
+ for file in files:
244
+ file_paths.append(file.name)
245
+ # Extract filename without path or extension for later use
246
+ file_names.append(get_base_filename(file.name))
247
+ return file_paths, file_paths, file_names
248
+
249
+ upload_button.upload(
250
+ fn=handle_upload,
251
+ inputs=[upload_button],
252
+ outputs=[input_gallery, image_state, filename_state]
253
+ )
254
+
255
+ # Function to analyze images
256
+ def analyze_images(image_paths, model_choice, comparison_mode, filenames):
257
+ if not image_paths:
258
+ return [], [], 0, "", "No images", "", "", "", [], ""
259
+
260
+ image_results = []
261
+ empty_preferences = [None] * len(image_paths) # Initialize with no preferences
262
+
263
+ for i, image_path in enumerate(image_paths):
264
+ # Use original filename as image_id if available
265
+ if i < len(filenames) and filenames[i]:
266
+ image_id = filenames[i]
267
+ else:
268
+ image_id = f"Image {i+1}"
269
+
270
+ try:
271
+ # Open the image file for analysis
272
+ img = Image.open(image_path)
273
+ prompt0 = base_prompt()
274
+
275
+ # In comparison mode, always generate both outputs
276
+ if comparison_mode:
277
+ # Generate Model A output (GPT-4o)
278
+ model_a_result = gpt.generate_caption(img, model="gpt-4o", prompt=prompt0)
279
+
280
+ # Generate Model B output (Default/GPT-4o-mini)
281
+ model_b_result = gpt.generate_caption(img, model="gpt-4o-mini", prompt=prompt0)
282
+
283
+ # Add to results
284
+ image_results.append({
285
+ "image_id": image_id,
286
+ "model_a_content": model_a_result,
287
+ "model_b_content": model_b_result,
288
+ "preferred_model": None # No preference yet
289
+ })
290
+ else:
291
+ # Use the selected model
292
+ if model_choice == "GPT-4o":
293
+ result = gpt.generate_caption(img, model="gpt-4o", prompt=prompt0)
294
+ else: # Default model
295
+ result = gpt.generate_caption(img, model="gpt-4o-mini", prompt=prompt0)
296
+
297
+ # For single mode, we still keep the structure compatible with comparison mode
298
+ image_results.append({
299
+ "image_id": image_id,
300
+ "model_a_content": result,
301
+ "model_b_content": "",
302
+ "preferred_model": None
303
+ })
304
+
305
+ except Exception as e:
306
+ error_message = f"Error: {str(e)}"
307
+ image_results.append({
308
+ "image_id": image_id,
309
+ "model_a_content": error_message,
310
+ "model_b_content": error_message if comparison_mode else "",
311
+ "preferred_model": None
312
+ })
313
+
314
+ # Create a CSV file for download
315
+ csv_path = create_csv_file(image_results)
316
+
317
+ # Set up initial display with first image
318
+ if len(image_paths) > 0:
319
+ initial_image = image_paths[0]
320
+ initial_counter = f"{1} of {len(image_paths)}"
321
+
322
+ if comparison_mode:
323
+ initial_model_a = image_results[0]["model_a_content"]
324
+ initial_model_b = image_results[0]["model_b_content"]
325
+ initial_text = "" # Not used in comparison mode
326
+ else:
327
+ initial_text = image_results[0]["model_a_content"]
328
+ initial_model_a = "" # Not used in single mode
329
+ initial_model_b = "" # Not used in single mode
330
+ else:
331
+ initial_image = ""
332
+ initial_text = "No images analyzed"
333
+ initial_model_a = ""
334
+ initial_model_b = ""
335
+ initial_counter = "0 of 0"
336
+
337
+ return (image_paths, image_results, 0, initial_image, initial_counter,
338
+ initial_text, initial_model_a, initial_model_b, empty_preferences,
339
+ csv_path, "")
340
+
341
+ # Function to navigate to previous image
342
+ def go_to_prev(current_idx, images, results, comparison_mode, preferences):
343
+ if not images or len(images) == 0:
344
+ return current_idx, "", "0 of 0", "", "", "", ""
345
+
346
+ new_idx = (current_idx - 1) % len(images) if current_idx > 0 else len(images) - 1
347
+ counter_html = f"{new_idx + 1} of {len(images)}"
348
+
349
+ # Get preference status for this image
350
+ preference_message = ""
351
+ if preferences[new_idx]:
352
+ preferred = "Model A" if preferences[new_idx] == "A" else "Model B"
353
+ preference_message = f"You selected {preferred} for this image"
354
+
355
+ if comparison_mode:
356
+ return (new_idx, images[new_idx], counter_html, "",
357
+ results[new_idx]["model_a_content"],
358
+ results[new_idx]["model_b_content"],
359
+ preference_message)
360
+ else:
361
+ return (new_idx, images[new_idx], counter_html,
362
+ results[new_idx]["model_a_content"], "", "", "")
363
+
364
+ # Function to navigate to next image
365
+ def go_to_next(current_idx, images, results, comparison_mode, preferences):
366
+ if not images or len(images) == 0:
367
+ return current_idx, "", "0 of 0", "", "", "", ""
368
+
369
+ new_idx = (current_idx + 1) % len(images)
370
+ counter_html = f"{new_idx + 1} of {len(images)}"
371
+
372
+ # Get preference status for this image
373
+ preference_message = ""
374
+ if preferences[new_idx]:
375
+ preferred = "Model A" if preferences[new_idx] == "A" else "Model B"
376
+ preference_message = f"You selected {preferred} for this image"
377
+
378
+ if comparison_mode:
379
+ return (new_idx, images[new_idx], counter_html, "",
380
+ results[new_idx]["model_a_content"],
381
+ results[new_idx]["model_b_content"],
382
+ preference_message)
383
+ else:
384
+ return (new_idx, images[new_idx], counter_html,
385
+ results[new_idx]["model_a_content"], "", "", "")
386
+
387
+ # Function to handle Model A selection
388
+ def select_model_a(current_idx, images, results, preferences):
389
+ if not images or current_idx >= len(images):
390
+ return preferences, "No image selected"
391
+
392
+ # Create a copy of preferences to modify
393
+ new_preferences = preferences.copy()
394
+
395
+ # Update preference for current image
396
+ new_preferences[current_idx] = "A"
397
+
398
+ # Save preference to CSV
399
+ image_path = images[current_idx]
400
+ model_a_text = results[current_idx]["model_a_content"]
401
+ model_b_text = results[current_idx]["model_b_content"]
402
+ save_preference(image_path, model_a_text, model_b_text, "Model A")
403
+
404
+ # Also update the results with the preference
405
+ results[current_idx]["preferred_model"] = "A"
406
+
407
+ # Create confirmation message
408
+ message = f"✓ You selected Model A for this image"
409
+
410
+ return new_preferences, message
411
+
412
+ # Function to handle Model B selection
413
+ def select_model_b(current_idx, images, results, preferences):
414
+ if not images or current_idx >= len(images):
415
+ return preferences, "No image selected"
416
+
417
+ # Create a copy of preferences to modify
418
+ new_preferences = preferences.copy()
419
+
420
+ # Update preference for current image
421
+ new_preferences[current_idx] = "B"
422
+
423
+ # Save preference to CSV
424
+ image_path = images[current_idx]
425
+ model_a_text = results[current_idx]["model_a_content"]
426
+ model_b_text = results[current_idx]["model_b_content"]
427
+ save_preference(image_path, model_a_text, model_b_text, "Model B")
428
+
429
+ # Also update the results with the preference
430
+ results[current_idx]["preferred_model"] = "B"
431
+
432
+ # Create confirmation message
433
+ message = f"✓ You selected Model B for this image"
434
+
435
+ return new_preferences, message
436
+
437
+ # Connect the analyze button
438
+ analyze_button.click(
439
+ fn=analyze_images,
440
+ inputs=[image_state, model_choice, comparison_mode, filename_state],
441
+ outputs=[
442
+ all_images, all_results, current_index, current_image, image_counter,
443
+ analysis_text, model_a_text, model_b_text, preference_state,
444
+ csv_download, preference_status
445
+ ]
446
+ )
447
+
448
+ # Connect navigation buttons for both modes
449
+ prev_button.click(
450
+ fn=go_to_prev,
451
+ inputs=[current_index, all_images, all_results, comparison_mode, preference_state],
452
+ outputs=[current_index, current_image, image_counter, analysis_text,
453
+ model_a_text, model_b_text, preference_status]
454
+ )
455
+
456
+ next_button.click(
457
+ fn=go_to_next,
458
+ inputs=[current_index, all_images, all_results, comparison_mode, preference_state],
459
+ outputs=[current_index, current_image, image_counter, analysis_text,
460
+ model_a_text, model_b_text, preference_status]
461
+ )
462
+
463
+ # Connect model selection buttons with separate functions
464
+ model_a_button.click(
465
+ fn=select_model_a,
466
+ inputs=[current_index, all_images, all_results, preference_state],
467
+ outputs=[preference_state, preference_status]
468
+ )
469
+
470
+ model_b_button.click(
471
+ fn=select_model_b,
472
+ inputs=[current_index, all_images, all_results, preference_state],
473
+ outputs=[preference_state, preference_status]
474
+ )
475
 
476
+ # Optional: Add additional information
477
+ with gr.Accordion("About", open=False):
478
+ gr.Markdown("""
479
+ ## About this demo
480
+
481
+ This demo generates alt-text for uploaded images.
482
+
483
+ - Upload one or more images using the upload button
484
+ - Choose between standard mode or A/B testing mode
485
+ - In standard mode, select one model to generate alt-text
486
+ - In A/B testing mode, compare outputs from two models and select your preference
487
+ - Navigate through the images with the Previous and Next buttons
488
+ - Download CSV with all results
489
+
490
+ Developed by the Natural History Museum in Partnership with National Museums Liverpool.
491
+ """)
492
 
493
+ return demo
 
 
 
 
494
 
495
  # Launch the app
496
  if __name__ == "__main__":
497
+ app = create_demo()
498
+ app.launch()
library/.ipynb_checkpoints/utils_html-checkpoint.py ADDED
File without changes
library/.ipynb_checkpoints/utils_model-checkpoint.py ADDED
@@ -0,0 +1,298 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import base64
2
+ from openai import OpenAI
3
+ from PIL import Image
4
+ from io import BytesIO
5
+ import os
6
+
7
+ def resize_image(image_input, max_size=1024, preserve_aspect_ratio=True):
8
+ """
9
+ Resize an image while preserving aspect ratio
10
+
11
+ Args:
12
+ image_input: Either a PIL Image object or a string file path to an image
13
+ max_size (int): Maximum width or height (whichever is larger)
14
+ preserve_aspect_ratio (bool): Whether to preserve the aspect ratio
15
+
16
+ Returns:
17
+ PIL.Image: Resized image
18
+ """
19
+ # Load the image if it's a file path
20
+ if isinstance(image_input, str):
21
+ if not os.path.exists(image_input):
22
+ raise FileNotFoundError(f"Image file not found: {image_input}")
23
+ img = Image.open(image_input)
24
+ else:
25
+ # Already a PIL Image
26
+ img = image_input
27
+
28
+ # Get original dimensions
29
+ width, height = img.size
30
+
31
+ # Skip if already smaller than max_size
32
+ if width <= max_size and height <= max_size:
33
+ return img
34
+
35
+ if preserve_aspect_ratio:
36
+ # Calculate the ratio
37
+ if width > height:
38
+ new_width = max_size
39
+ new_height = int(height * (max_size / width))
40
+ else:
41
+ new_height = max_size
42
+ new_width = int(width * (max_size / height))
43
+ else:
44
+ new_width = max_size
45
+ new_height = max_size
46
+
47
+ # Resize the image
48
+ resized_img = img.resize((new_width, new_height), Image.LANCZOS)
49
+ return resized_img
50
+
51
+ def encode_image(image_input, format="JPEG", max_size=None):
52
+ """
53
+ Convert an image to a base64 encoded string, with optional resizing.
54
+
55
+ Args:
56
+ image_input: Either a PIL Image object or a string file path to an image
57
+ format: Image format for saving (default: "JPEG")
58
+ max_size: Maximum size for the image (width or height). If None, no resizing is done.
59
+
60
+ Returns:
61
+ base64 encoded string of the image
62
+ """
63
+ # Check if input is a file path (string) or PIL Image
64
+ if isinstance(image_input, str):
65
+ # Input is a file path
66
+ if not os.path.exists(image_input):
67
+ raise FileNotFoundError(f"Image file not found: {image_input}")
68
+
69
+ if max_size:
70
+ # Load, resize, and encode
71
+ img = Image.open(image_input)
72
+ resized_img = resize_image(img, max_size=max_size)
73
+ buffered = BytesIO()
74
+ resized_img.save(buffered, format=format)
75
+ return base64.b64encode(buffered.getvalue()).decode("utf-8")
76
+ else:
77
+ # Read file directly without resizing
78
+ with open(image_input, "rb") as image_file:
79
+ return base64.b64encode(image_file.read()).decode("utf-8")
80
+ else:
81
+ # Input is a PIL Image object
82
+ if max_size:
83
+ image_input = resize_image(image_input, max_size=max_size)
84
+
85
+ buffered = BytesIO()
86
+ image_input.save(buffered, format=format)
87
+ return base64.b64encode(buffered.getvalue()).decode("utf-8")
88
+
89
+ class OpenRouterAPI:
90
+ def __init__(self, api_key=None,base_url="https://openrouter.ai/api/v1"):
91
+ """
92
+ Initialize the OpenRouter client
93
+
94
+ Args:
95
+ api_key (str, optional): OpenRouter API key. If None, will try to get from environment variable
96
+ """
97
+ api_key = api_key or os.getenv("OPENROUTER_API_KEY")
98
+ if not api_key:
99
+ raise ValueError("OpenRouter API key not provided and not found in environment variables")
100
+
101
+ self.client = OpenAI(
102
+ api_key=api_key,
103
+ base_url=base_url
104
+ )
105
+
106
+ def list_models(self):
107
+ """
108
+ List all available models on OpenRouter
109
+
110
+ Returns:
111
+ list: List of model IDs
112
+ """
113
+ models = self.client.models.list()
114
+ model_ids = [model.id for model in models.data]
115
+ return model_ids
116
+
117
+ def generate_caption(self, image_path,
118
+ model="anthropic/claude-3-7-sonnet",
119
+ prompt_dev="",
120
+ prompt="Give a very brief description of this image.",
121
+ detail="high",
122
+ temperature=0.7,
123
+ max_image_size=1024):
124
+ """
125
+ Generate captions for an image using OpenRouter models
126
+
127
+ Args:
128
+ image_path (str): Path to the image file
129
+ model (str): Model to use (e.g., 'anthropic/claude-3-7-sonnet', 'openai/gpt-4o')
130
+ prompt_dev (str): System prompt or developer prompt
131
+ prompt (str): Text prompt to guide caption generation
132
+ detail (str): Level of detail for image analysis ('low', 'high', etc.) - only applies to OpenAI models
133
+ temperature (float): Sampling temperature for generation
134
+ max_image_size (int): Maximum dimension of the image before encoding. Set to None to disable resizing.
135
+
136
+ Returns:
137
+ str: Generated caption
138
+ """
139
+ # Getting the Base64 string with optional resizing
140
+ base64_image = encode_image(image_path, max_size=max_image_size)
141
+
142
+ # Prepare messages based on OpenRouter's format
143
+ messages = []
144
+
145
+ # Add system message if prompt_dev is provided
146
+ if prompt_dev:
147
+ messages.append({
148
+ "role": "system",
149
+ "content": prompt_dev
150
+ })
151
+
152
+ # Add user message with text and image
153
+ content = [
154
+ {
155
+ "type": "text",
156
+ "text": prompt,
157
+ }
158
+ ]
159
+
160
+ # Add image with detail parameter only for OpenAI models
161
+ if "openai" in model.lower():
162
+ content.append({
163
+ "type": "image_url",
164
+ "image_url": {"url": f"data:image/jpeg;base64,{base64_image}", "detail": detail},
165
+ })
166
+ else:
167
+ content.append({
168
+ "type": "image_url",
169
+ "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"},
170
+ })
171
+
172
+ messages.append({
173
+ "role": "user",
174
+ "content": content,
175
+ })
176
+
177
+ response = self.client.chat.completions.create(
178
+ model=model,
179
+ messages=messages,
180
+ temperature=temperature,
181
+ )
182
+
183
+ return response.choices[0].message.content
184
+
185
+ def generate_text_response(self, text_prompt,
186
+ model="anthropic/claude-3-5-haiku",
187
+ prompt_dev="",
188
+ temperature=0.7):
189
+ """
190
+ Generate responses based on text input using OpenRouter models
191
+
192
+ Args:
193
+ text_prompt (str): The text to analyze or respond to
194
+ model (str): Model to use (e.g., 'anthropic/claude-3-5-haiku', 'openai/gpt-4o-mini', 'google/gemini-pro')
195
+ prompt_dev (str): System prompt or developer prompt
196
+ temperature (float): Sampling temperature for generation
197
+
198
+ Returns:
199
+ str: Generated response
200
+ """
201
+ # Prepare messages based on OpenRouter's format
202
+ messages = []
203
+
204
+ # Add system message if prompt_dev is provided
205
+ if prompt_dev:
206
+ messages.append({
207
+ "role": "system",
208
+ "content": prompt_dev
209
+ })
210
+
211
+ # Add user message with text
212
+ messages.append({
213
+ "role": "user",
214
+ "content": text_prompt
215
+ })
216
+
217
+ response = self.client.chat.completions.create(
218
+ model=model,
219
+ messages=messages,
220
+ temperature=temperature,
221
+ )
222
+
223
+ return response.choices[0].message.content
224
+
225
+ def classify_objs(self, image_path,
226
+ categories=["Painting/sketch", "Statue/Bust", "Clothing", "Porcelain/Ceramic tableware", "Text based Document", "Other"],
227
+ model="openai/gpt-4o-mini",
228
+ detail="low",
229
+ max_image_size=512): # Using smaller size for classification since less detail is needed
230
+ """
231
+ Classify objects in an image into predefined categories
232
+
233
+ Args:
234
+ image_path (str): Path to the image file
235
+ categories (list): List of categories for classification
236
+ model (str): Model to use for classification
237
+ detail (str): Level of detail for image analysis ('low', 'high') - only applies to OpenAI models
238
+ max_image_size (int): Maximum dimension for the image. Can be smaller for classification tasks.
239
+
240
+ Returns:
241
+ str: Classification result
242
+ """
243
+ prompt = f"This is an image of a museum object. Classify it into one of these categories: {categories}. Only classify it if you are confident it belongs in that category and the category represents the main portion of the image, otherwise return 'Other'. Respond with only the category name."
244
+ return self.generate_caption(image_path, model=model, prompt=prompt, detail=detail, max_image_size=max_image_size)
245
+
246
+ def estimate_cost(self, model, tokens_in=1000, tokens_out=200, image=False, detail="low"):
247
+ """
248
+ Estimate the cost of using a specific model based on input/output tokens
249
+
250
+ Args:
251
+ model (str): Model identifier
252
+ tokens_in (int): Number of input tokens
253
+ tokens_out (int): Number of output tokens
254
+ image (bool): Whether the request includes an image
255
+ detail (str): Image detail level ('low', 'high')
256
+
257
+ Returns:
258
+ dict: Cost estimate information
259
+ """
260
+ # This is a simplified approach - in a real implementation,
261
+ # you might want to use OpenRouter's pricing API or maintain
262
+ # a more complete pricing table
263
+
264
+ # Simplified pricing mapping (in USD per 1M tokens)
265
+ # These are example values - please update with actual OpenRouter pricing
266
+ pricing = {
267
+ "anthropic/claude-3-7-sonnet": {"input": 15.0, "output": 75.0},
268
+ "anthropic/claude-3-5-haiku": {"input": 1.0, "output": 5.0},
269
+ "openai/gpt-4o": {"input": 10.0, "output": 30.0},
270
+ "openai/gpt-4o-mini": {"input": 0.2, "output": 0.6},
271
+ "google/gemini-pro": {"input": 0.5, "output": 1.5},
272
+ }
273
+
274
+ # Default to a moderate pricing if model not found
275
+ model_pricing = pricing.get(model, {"input": 5.0, "output": 15.0})
276
+
277
+ # Image tokens estimation
278
+ image_tokens = 0
279
+ if image:
280
+ if detail == "low":
281
+ image_tokens = 1200
282
+ else: # high
283
+ image_tokens = 4000
284
+
285
+ # Calculate costs
286
+ input_cost = (tokens_in + image_tokens) * model_pricing["input"] / 1000000
287
+ output_cost = tokens_out * model_pricing["output"] / 1000000
288
+ total_cost = input_cost + output_cost
289
+
290
+ return {
291
+ "model": model,
292
+ "input_tokens": tokens_in + image_tokens,
293
+ "output_tokens": tokens_out,
294
+ "input_cost": input_cost,
295
+ "output_cost": output_cost,
296
+ "total_cost": total_cost
297
+ }
298
+
library/.ipynb_checkpoints/utils_prompt-checkpoint.py ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ def prompt_new(title=None):
2
+ if title == None:
3
+ title_info = {}
4
+ else:
5
+ title_info = f"(titled: {title})"
6
+ return f"This object {title_info} is part of a museum collection."
7
+
8
+ def prompt_1(title=None):
9
+ if title == None:
10
+ title_info = {}
11
+ else:
12
+ title_info = f"(titled: {title})"
13
+ return f"The image {title_info} is part of a museum collection. Please give a short description of the museum object that will be used as alt-text. Do not discuss historical context other than what is included in the image. Don't mention the background, simply focus on describing the object in the image itself and try to avoid artistic interpretations and evaluative descriptions."
14
+
15
+ def context_prompt():
16
+
17
+ prompt = """
18
+ Additionally add a paragraph below starting with 'Additional context:' where you mention the broader context of the image. Mention additional information that will be useful when viewing this image considering race, sexuality, gender and class where relevant. This is a museum artifact and the description will be used for educational purposes.
19
+ So it should have this format:
20
+ main text
21
+
22
+ Additional context: additional text
23
+
24
+ The main text should be a maximum of 300 characters and the additional context should be a maximum of 120.
25
+ """
26
+ return prompt
27
+
28
+
29
+ def generate_prompt(title,category='Other',max_characters=450):
30
+ pre_prompt = f"Keep language simple, plain and informative and limit responses to a maximum of {max_characters} characters. "
31
+
32
+ def main_prompt(image='image',title=''):
33
+ return f"This {image} (titled: {title}) is part of a museum collection. Please give a concise description of the {image} that will be used as alt-text for accessibility purposes. "
34
+ #Please give a concise description of the garment that will be used as alt-text to make the museum more accessible to visually impaired people. Don't discuss historical context other than what is included in the image. Don't mention the background or setting, simply focus on describing the garment itself and try to avoid using artistic interpretations and evaluative descriptions.
35
+
36
+
37
+ extra_prompt = "Do not explicitly state the title in the description. Do not discuss historical context other than what is included in the image. Avoid concluding statements. "
38
+ extra_prompt2 = "Avoid artistic interpretations and evaluative descriptions. "
39
+ background_prompt = "Do not mention the background or setting, simply focus on describing the item itself. "
40
+ bb = "keep the description clear, concise and direct to assist visually impaired users - avoid artistic interpretations and evaluative descriptions"
41
+
42
+ if category == 'Clothing':
43
+ prompt = main_prompt(image='garment',title=title)
44
+ prompt += "Provide a concise, factual description of the garment, including its type, material, color, shape, notable design features, and any visible embellishments. "
45
+ prompt += extra_prompt
46
+ prompt += extra_prompt2
47
+
48
+ elif category == 'Statue/Bust':
49
+ prompt = main_prompt(image='sculpture',title=title)
50
+ prompt += extra_prompt
51
+ prompt += extra_prompt2
52
+ prompt += background_prompt
53
+
54
+ elif category == 'Painting/sketch':
55
+ prompt = main_prompt(image='artwork',title=title)
56
+ prompt += extra_prompt
57
+ prompt += "Focus on providing a description of the artwork including its content and also briefly its style. "
58
+ prompt += extra_prompt2
59
+
60
+ elif category == 'Porcelain/Ceramic tableware':
61
+ prompt = main_prompt(image='tablewear',title=title)
62
+ prompt += "Describe its type (e.g., plate, bowl, teacup) and notable elements of it's appearance. "
63
+ prompt += extra_prompt
64
+ prompt += extra_prompt2
65
+ prompt += background_prompt
66
+
67
+ elif category == 'Text based document':
68
+ prompt = main_prompt(image='image',title=title)
69
+ prompt = "If the text is long do not include the whole text but summarise it. "
70
+ prompt += extra_prompt
71
+ prompt += extra_prompt2
72
+ else:
73
+ #prompt = main_prompt(image='image',title=title) + extra_prompt + extra_prompt2
74
+ prompt = f"This image is titled: {title} and is part of a museum collection. Please give a concise description of the museum object that will be used as alt-text. Do not discuss historical context other than what is included in the image. Don't mention the background, simply focus on describing the object in the image itself and try to avoid artistic interpretations and evaluative descriptions."
75
+
76
+ return pre_prompt + prompt
library/__pycache__/utils.cpython-311.pyc ADDED
Binary file (7.82 kB). View file
 
library/__pycache__/utils_html.cpython-311.pyc ADDED
Binary file (32.9 kB). View file
 
library/__pycache__/utils_model.cpython-311.pyc ADDED
Binary file (11.6 kB). View file
 
library/__pycache__/utils_prompt.cpython-311.pyc ADDED
Binary file (4.77 kB). View file
 
library/utils_html.py ADDED
@@ -0,0 +1,840 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ def generate_slides_html(image_paths, image_ids, desc1, desc2, desc3, output_file='gallery_with_descriptions.html'):
2
+ # Start of HTML content
3
+ html_content = '''<!DOCTYPE html>
4
+ <html lang="en">
5
+ <head>
6
+ <meta charset="UTF-8">
7
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
8
+ <title>Image Gallery</title>
9
+ <style>
10
+ body {
11
+ font-family: Arial, sans-serif;
12
+ max-width: 900px;
13
+ margin: 0 auto;
14
+ padding: 20px;
15
+ background: #f5f5f5;
16
+ }
17
+ .gallery-container {
18
+ position: relative;
19
+ background: white;
20
+ border-radius: 8px;
21
+ padding: 30px;
22
+ margin-bottom: 30px;
23
+ box-shadow: 0 2px 4px rgba(0,0,0,0.1);
24
+ min-height: 700px;
25
+ }
26
+ .gallery-container img {
27
+ max-width: 700px;
28
+ max-height: 500px;
29
+ height: auto;
30
+ border-radius: 4px;
31
+ display: block;
32
+ margin: 0 auto 20px;
33
+ }
34
+ .slide {
35
+ display: none;
36
+ }
37
+ .slide.active {
38
+ display: block;
39
+ animation: fadeIn 0.5s;
40
+ }
41
+ @keyframes fadeIn {
42
+ from { opacity: 0; }
43
+ to { opacity: 1; }
44
+ }
45
+ .nav-buttons {
46
+ display: flex;
47
+ justify-content: space-between;
48
+ margin: 20px 0;
49
+ }
50
+ .nav-button {
51
+ padding: 10px 20px;
52
+ background: #007bff;
53
+ color: white;
54
+ border: none;
55
+ border-radius: 4px;
56
+ cursor: pointer;
57
+ font-size: 16px;
58
+ }
59
+ .nav-button:disabled {
60
+ background: #cccccc;
61
+ cursor: not-allowed;
62
+ }
63
+ .image-counter {
64
+ text-align: center;
65
+ font-weight: bold;
66
+ margin-bottom: 20px;
67
+ color: #555;
68
+ }
69
+ .description {
70
+ background: #f8f8f8;
71
+ padding: 15px;
72
+ margin: 15px 0;
73
+ border-radius: 4px;
74
+ white-space: pre-line;
75
+ }
76
+ .image-title {
77
+ font-size: 1.5em;
78
+ font-weight: bold;
79
+ margin-bottom: 15px;
80
+ color: #333;
81
+ text-align: center;
82
+ }
83
+ .model-title {
84
+ font-weight: bold;
85
+ color: #666;
86
+ margin-bottom: 5px;
87
+ }
88
+ .random-button {
89
+ padding: 10px 20px;
90
+ background: #28a745;
91
+ color: white;
92
+ border: none;
93
+ border-radius: 4px;
94
+ cursor: pointer;
95
+ font-size: 16px;
96
+ }
97
+ .random-button:hover {
98
+ background: #218838;
99
+ }
100
+ </style>
101
+ </head>
102
+ <body>
103
+ <div class="gallery-container" id="gallery">
104
+ <div class="image-counter">Image <span id="current-index">1</span> of <span id="total-images">0</span></div>
105
+
106
+ <!-- Slides will be generated here -->
107
+ '''
108
+
109
+ # Generate content for each image
110
+ for i in range(len(image_paths)):
111
+ # Process descriptions to handle line breaks
112
+ desc1_html = desc1[i].replace('\n', '<br>')
113
+ desc2_html = desc2[i].replace('\n', '<br>')
114
+ desc3_html = desc3[i].replace('\n', '<br>')
115
+
116
+ html_content += f'''
117
+ <div class="slide" data-image-id="{image_ids[i]}" id="slide-{i}">
118
+ <div class="image-title">Image ID: {image_ids[i]}</div>
119
+ <img src="{image_paths[i]}" alt="Image {image_ids[i]}">
120
+
121
+ <div class="description">
122
+ <div class="model-title">Model 1</div>
123
+ {desc1_html}
124
+ </div>
125
+
126
+ <div class="description">
127
+ <div class="model-title">Model 2</div>
128
+ {desc2_html}
129
+ </div>
130
+
131
+ <div class="description">
132
+ <div class="model-title">Model 3</div>
133
+ {desc3_html}
134
+ </div>
135
+ </div>
136
+ '''
137
+
138
+ html_content += '''
139
+ <div class="nav-buttons">
140
+ <button id="prev-button" class="nav-button">Previous</button>
141
+ <button id="next-button" class="nav-button">Next</button>
142
+ </div>
143
+ </div>
144
+
145
+ <script>
146
+ // Variables to track current slide and history
147
+ let currentSlide = 0;
148
+ const slides = document.querySelectorAll('.slide');
149
+ const totalSlides = slides.length;
150
+ const viewedSlides = new Set([0]); // Track which slides have been viewed
151
+ const slideHistory = [0]; // Track navigation history
152
+ let historyPosition = 0; // Current position in history
153
+
154
+ // Update total images counter
155
+ document.getElementById('total-images').textContent = totalSlides;
156
+
157
+ // Function to show a specific slide
158
+ function goToSlide(index) {
159
+ // Hide all slides
160
+ slides.forEach(slide => {
161
+ slide.classList.remove('active');
162
+ });
163
+
164
+ // Show the selected slide
165
+ slides[index].classList.add('active');
166
+ currentSlide = index;
167
+
168
+ // Add to viewed slides
169
+ viewedSlides.add(index);
170
+
171
+ // Update the counter
172
+ document.getElementById('current-index').textContent = index + 1;
173
+
174
+ // Update button states
175
+ document.getElementById('prev-button').disabled = slideHistory.length <= 1;
176
+ }
177
+
178
+ // Function to go to a random slide and track in history
179
+ function goToRandomSlide() {
180
+ // Get array of unviewed slide indices
181
+ const unviewedSlides = Array.from(Array(totalSlides).keys())
182
+ .filter(index => !viewedSlides.has(index) && index !== currentSlide);
183
+
184
+ // If we've seen all slides except the current one, reset
185
+ if (unviewedSlides.length === 0) {
186
+ viewedSlides.clear();
187
+ // Don't add current slide to viewed set so we don't repeat it immediately
188
+
189
+ // Recalculate unviewed slides (now all except current)
190
+ const allSlides = Array.from(Array(totalSlides).keys())
191
+ .filter(index => index !== currentSlide);
192
+
193
+ // Select a random slide from all slides except current
194
+ const randomIndex = Math.floor(Math.random() * allSlides.length);
195
+ const newSlideIndex = allSlides[randomIndex];
196
+
197
+ // Add to history and update position
198
+ slideHistory.push(newSlideIndex);
199
+ historyPosition = slideHistory.length - 1;
200
+
201
+ goToSlide(newSlideIndex);
202
+ } else {
203
+ // Select a random unviewed slide
204
+ const randomIndex = Math.floor(Math.random() * unviewedSlides.length);
205
+ const newSlideIndex = unviewedSlides[randomIndex];
206
+
207
+ // Add to history and update position
208
+ slideHistory.push(newSlideIndex);
209
+ historyPosition = slideHistory.length - 1;
210
+
211
+ goToSlide(newSlideIndex);
212
+ }
213
+ }
214
+
215
+ // Function to go to previous slide in history
216
+ function goToPreviousSlide() {
217
+ if (slideHistory.length > 1 && historyPosition > 0) {
218
+ historyPosition--;
219
+ goToSlide(slideHistory[historyPosition]);
220
+ }
221
+ }
222
+
223
+ // Function for next slide (completely random, no repeats until all seen)
224
+ function goToNextSlide() {
225
+ goToRandomSlide();
226
+ }
227
+
228
+ // Function for previous slide (removed history navigation)
229
+ function goToPrevSlide() {
230
+ // No longer tracking history - just go to a random slide
231
+ goToRandomSlide();
232
+ }
233
+
234
+ // Initialize the first slide
235
+ goToSlide(0);
236
+
237
+ // Event listeners for navigation buttons
238
+ document.getElementById('next-button').addEventListener('click', goToRandomSlide);
239
+ document.getElementById('prev-button').addEventListener('click', goToPreviousSlide);
240
+
241
+ // Add keyboard navigation
242
+ document.addEventListener('keydown', (e) => {
243
+ if (e.key === 'ArrowRight' || e.key === ' ' || e.key === 'Enter') {
244
+ goToRandomSlide(); // Right arrow, space, or enter goes to next random
245
+ } else if (e.key === 'ArrowLeft') {
246
+ goToPreviousSlide(); // Left arrow goes to previous
247
+ }
248
+ });
249
+
250
+ // Initialize - disable previous button at start
251
+ document.getElementById('prev-button').disabled = true;
252
+ </script>
253
+ </body>
254
+ </html>
255
+ '''
256
+
257
+ with open(output_file, 'w', encoding='utf-8') as f:
258
+ f.write(html_content)
259
+
260
+ print(f"Gallery with three model outputs has been generated as {output_file}")
261
+
262
+
263
+
264
+ def generate_rating_html(image_paths, image_ids, desc1, desc2, desc3, desc4, desc5, output_file='gallery_with_ratings.html'):
265
+ # Start of HTML content with linebreaks
266
+ html_content = '''<!DOCTYPE html>
267
+ <html lang="en">
268
+ <head>
269
+ <meta charset="UTF-8">
270
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
271
+ <title>Image Gallery with Ratings</title>
272
+ <style>
273
+ body {
274
+ font-family: Arial, sans-serif;
275
+ max-width: 800px;
276
+ margin: 0 auto;
277
+ padding: 20px;
278
+ background: #f5f5f5;
279
+ }
280
+ .image-container {
281
+ background: white;
282
+ border-radius: 8px;
283
+ padding: 20px;
284
+ margin-bottom: 30px;
285
+ box-shadow: 0 2px 4px rgba(0,0,0,0.1);
286
+ }
287
+ .image-container img {
288
+ max-width: 500px;
289
+ height: auto;
290
+ border-radius: 4px;
291
+ display: block;
292
+ margin: 0 auto;
293
+ }
294
+ .description {
295
+ background: #f8f8f8;
296
+ padding: 15px;
297
+ margin: 10px 0;
298
+ border-radius: 4px;
299
+ white-space: pre-line; /* This helps preserve line breaks */
300
+ }
301
+ .image-title {
302
+ font-size: 1.5em;
303
+ font-weight: bold;
304
+ margin-bottom: 15px;
305
+ color: #333;
306
+ text-align: center;
307
+ }
308
+ .model-title {
309
+ font-weight: bold;
310
+ color: #666;
311
+ margin-bottom: 5px;
312
+ }
313
+ .rating {
314
+ display: flex;
315
+ align-items: center;
316
+ margin-top: 10px;
317
+ padding: 10px;
318
+ background: #fff;
319
+ border-radius: 4px;
320
+ }
321
+ .rating-label {
322
+ margin-right: 10px;
323
+ font-weight: bold;
324
+ }
325
+ .rating-group {
326
+ display: flex;
327
+ gap: 10px;
328
+ }
329
+ .rating-radio {
330
+ display: none;
331
+ }
332
+ .rating-button {
333
+ padding: 8px 12px;
334
+ border: 1px solid #ccc;
335
+ border-radius: 4px;
336
+ cursor: pointer;
337
+ transition: all 0.2s;
338
+ }
339
+ .rating-radio:checked + .rating-button {
340
+ background: #007bff;
341
+ color: white;
342
+ border-color: #0056b3;
343
+ }
344
+ .save-button {
345
+ position: fixed;
346
+ bottom: 20px;
347
+ right: 20px;
348
+ padding: 10px 20px;
349
+ background: #007bff;
350
+ color: white;
351
+ border: none;
352
+ border-radius: 4px;
353
+ cursor: pointer;
354
+ }
355
+ .save-button:disabled {
356
+ background: #cccccc;
357
+ cursor: not-allowed;
358
+ }
359
+ #images-container {
360
+ /* Container for all image blocks */
361
+ }
362
+ </style>
363
+ </head>
364
+ <body>
365
+ <div id="rater-name-container">
366
+ <label for="rater-name" style="font-weight: bold;">Labeller id:</label>
367
+ <input type="text" id="rater-name" style="margin: 10px 0; padding: 5px; width: 200px;">
368
+ </div>
369
+
370
+ <div id="images-container">
371
+ <!-- Image containers will be inserted here dynamically -->
372
+ </div>
373
+ '''
374
+
375
+ # Create JavaScript arrays for each piece of data
376
+ js_image_paths = []
377
+ js_image_ids = []
378
+ js_desc1 = []
379
+ js_desc2 = []
380
+ js_desc3 = []
381
+ js_desc4 = []
382
+ js_desc5 = []
383
+
384
+ for i in range(len(image_paths)):
385
+ js_image_paths.append(f'"{image_paths[i]}"')
386
+ js_image_ids.append(f'"{image_ids[i]}"')
387
+
388
+ # Process descriptions to properly handle line breaks - replace \n with <br> for HTML rendering
389
+ desc1_html = desc1[i].replace('"', '\\"').replace('\n', '<br>')
390
+ desc2_html = desc2[i].replace('"', '\\"').replace('\n', '<br>')
391
+ desc3_html = desc3[i].replace('"', '\\"').replace('\n', '<br>')
392
+ desc4_html = desc4[i].replace('"', '\\"').replace('\n', '<br>')
393
+ desc5_html = desc5[i].replace('"', '\\"').replace('\n', '<br>')
394
+
395
+ js_desc1.append(f'"{desc1_html}"')
396
+ js_desc2.append(f'"{desc2_html}"')
397
+ js_desc3.append(f'"{desc3_html}"')
398
+ js_desc4.append(f'"{desc4_html}"')
399
+ js_desc5.append(f'"{desc5_html}"')
400
+
401
+ # Add JavaScript to handle randomization and image display
402
+ html_content += f'''
403
+ <button onclick="saveRatings()" class="save-button" id="save-button">Save Ratings as CSV</button>
404
+
405
+ <script>
406
+ // Store all image data as separate arrays
407
+ const imagePaths = [{', '.join(js_image_paths)}];
408
+ const imageIds = [{', '.join(js_image_ids)}];
409
+ const desc1 = [{', '.join(js_desc1)}];
410
+ const desc2 = [{', '.join(js_desc2)}];
411
+ const desc3 = [{', '.join(js_desc3)}];
412
+ const desc4 = [{', '.join(js_desc4)}];
413
+ const desc5 = [{', '.join(js_desc5)}];
414
+
415
+ // Create an array of indices to shuffle for images
416
+ let indices = [];
417
+ for (let i = 0; i < imageIds.length; i++) {{
418
+ indices.push(i);
419
+ }}
420
+
421
+ // Function to render all images in randomized order with randomized model order
422
+ function renderImages() {{
423
+ const container = document.getElementById('images-container');
424
+
425
+ // Shuffle the indices (this randomizes image order)
426
+ shuffleArray(indices);
427
+
428
+ // Clear the container
429
+ container.innerHTML = '';
430
+
431
+ // Add each image in shuffled order
432
+ indices.forEach((originalIndex, newIndex) => {{
433
+ // For each image, we'll create a different random order for the models
434
+ let modelOrder = [1, 2, 3, 4, 5];
435
+ shuffleArray(modelOrder);
436
+
437
+ // Start building the image container HTML
438
+ let imageHtml = `
439
+ <div class="image-container" data-image-id="${{imageIds[originalIndex]}}">
440
+ <div class="image-title">Image ID: ${{imageIds[originalIndex]}}</div>
441
+ <img src="${{imagePaths[originalIndex]}}" alt="Image ${{imageIds[originalIndex]}}">
442
+ `;
443
+
444
+ // Add descriptions and rating UI for each model in the randomized order
445
+ modelOrder.forEach((modelNum, modelIndex) => {{
446
+ // Get the description data for this model
447
+ let descData;
448
+ if (modelNum === 1) descData = desc1[originalIndex];
449
+ else if (modelNum === 2) descData = desc2[originalIndex];
450
+ else if (modelNum === 3) descData = desc3[originalIndex];
451
+ else if (modelNum === 4) descData = desc4[originalIndex];
452
+ else if (modelNum === 5) descData = desc5[originalIndex];
453
+
454
+ // Create HTML for this model's description and rating
455
+ imageHtml += `
456
+ <div class="description">
457
+ <div class="model-title">Model ${{modelNum}}</div>
458
+ ${{descData}}
459
+ <div class="rating" data-model="${{modelNum}}">
460
+ <span class="rating-label">Rating:</span>
461
+ <div class="rating-group">
462
+ <input type="radio" name="rating-${{newIndex}}-${{modelNum}}" value="1" class="rating-radio" data-image="${{imageIds[originalIndex]}}" data-model="${{modelNum}}" id="rating-${{newIndex}}-${{modelNum}}-1">
463
+ <label class="rating-button" for="rating-${{newIndex}}-${{modelNum}}-1">1</label>
464
+ <input type="radio" name="rating-${{newIndex}}-${{modelNum}}" value="2" class="rating-radio" data-image="${{imageIds[originalIndex]}}" data-model="${{modelNum}}" id="rating-${{newIndex}}-${{modelNum}}-2">
465
+ <label class="rating-button" for="rating-${{newIndex}}-${{modelNum}}-2">2</label>
466
+ <input type="radio" name="rating-${{newIndex}}-${{modelNum}}" value="3" class="rating-radio" data-image="${{imageIds[originalIndex]}}" data-model="${{modelNum}}" id="rating-${{newIndex}}-${{modelNum}}-3">
467
+ <label class="rating-button" for="rating-${{newIndex}}-${{modelNum}}-3">3</label>
468
+ <input type="radio" name="rating-${{newIndex}}-${{modelNum}}" value="4" class="rating-radio" data-image="${{imageIds[originalIndex]}}" data-model="${{modelNum}}" id="rating-${{newIndex}}-${{modelNum}}-4">
469
+ <label class="rating-button" for="rating-${{newIndex}}-${{modelNum}}-4">4</label>
470
+ <input type="radio" name="rating-${{newIndex}}-${{modelNum}}" value="5" class="rating-radio" data-image="${{imageIds[originalIndex]}}" data-model="${{modelNum}}" id="rating-${{newIndex}}-${{modelNum}}-5">
471
+ <label class="rating-button" for="rating-${{newIndex}}-${{modelNum}}-5">5</label>
472
+ </div>
473
+ </div>
474
+ </div>`;
475
+ }});
476
+
477
+ // Close the image container div
478
+ imageHtml += `</div>`;
479
+
480
+ // Add the complete HTML for this image to the page
481
+ container.innerHTML += imageHtml;
482
+ }});
483
+ }}
484
+
485
+ // Fisher-Yates shuffle algorithm
486
+ function shuffleArray(array) {{
487
+ for (let i = array.length - 1; i > 0; i--) {{
488
+ const j = Math.floor(Math.random() * (i + 1));
489
+ [array[i], array[j]] = [array[j], array[i]];
490
+ }}
491
+ return array;
492
+ }}
493
+
494
+ function saveRatings() {{
495
+ const labellerId = document.getElementById('rater-name').value.trim();
496
+
497
+ if (!labellerId) {{
498
+ alert('Please enter a Labeller ID before saving');
499
+ return;
500
+ }}
501
+
502
+ const ratings = [];
503
+
504
+ // Collect all ratings
505
+ document.querySelectorAll('.rating-radio:checked').forEach(radio => {{
506
+ ratings.push({{
507
+ model: radio.dataset.model,
508
+ image_id: radio.dataset.image,
509
+ rating: radio.value
510
+ }});
511
+ }});
512
+
513
+ // Convert to CSV
514
+ const headers = ['model', 'image_id', 'rating'];
515
+ const csvContent = [
516
+ headers.join(','),
517
+ ...ratings.map(row => [
518
+ row.model,
519
+ row.image_id,
520
+ row.rating
521
+ ].join(','))
522
+ ].join('\\n');
523
+
524
+ // Create and trigger download with labeller ID in filename
525
+ const blob = new Blob([csvContent], {{ type: 'text/csv;charset=utf-8;' }});
526
+ const link = document.createElement('a');
527
+ link.href = URL.createObjectURL(blob);
528
+ link.download = `ratings_${{labellerId}}.csv`;
529
+ link.click();
530
+ }}
531
+
532
+ // Add event listener to enable/disable save button based on labeller ID
533
+ document.getElementById('rater-name').addEventListener('input', function() {{
534
+ const saveButton = document.getElementById('save-button');
535
+ saveButton.disabled = !this.value.trim();
536
+ }});
537
+
538
+ // Initially disable save button
539
+ document.getElementById('save-button').disabled = true;
540
+
541
+ // Render images when the page loads
542
+ document.addEventListener('DOMContentLoaded', function() {{
543
+ renderImages();
544
+ }});
545
+ </script>
546
+ </body>
547
+ </html>
548
+ '''
549
+
550
+ with open(output_file, 'w', encoding='utf-8') as f:
551
+ f.write(html_content)
552
+
553
+ print(f"Rating form with 5 models has been generated as {output_file}")
554
+
555
+
556
+ def generate_rating_html4(image_paths, image_ids, desc1, desc2, desc3, desc4, output_file='gallery_with_ratings.html'):
557
+ # Start of HTML content
558
+ html_content = '''<!DOCTYPE html>
559
+ <html lang="en">
560
+ <head>
561
+ <meta charset="UTF-8">
562
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
563
+ <title>Image Gallery with Ratings</title>
564
+ <style>
565
+ body {
566
+ font-family: Arial, sans-serif;
567
+ max-width: 800px;
568
+ margin: 0 auto;
569
+ padding: 20px;
570
+ background: #f5f5f5;
571
+ }
572
+ .image-container {
573
+ background: white;
574
+ border-radius: 8px;
575
+ padding: 20px;
576
+ margin-bottom: 30px;
577
+ box-shadow: 0 2px 4px rgba(0,0,0,0.1);
578
+ }
579
+ .image-container img {
580
+ max-width: 500px;
581
+ height: auto;
582
+ border-radius: 4px;
583
+ display: block;
584
+ margin: 0 auto;
585
+ }
586
+ .description {
587
+ background: #f8f8f8;
588
+ padding: 15px;
589
+ margin: 10px 0;
590
+ border-radius: 4px;
591
+ white-space: pre-line; /* This helps preserve line breaks */
592
+ }
593
+ .image-title {
594
+ font-size: 1.5em;
595
+ font-weight: bold;
596
+ margin-bottom: 15px;
597
+ color: #333;
598
+ text-align: center;
599
+ }
600
+ .model-title {
601
+ font-weight: bold;
602
+ color: #666;
603
+ margin-bottom: 5px;
604
+ }
605
+ .rating {
606
+ display: flex;
607
+ align-items: center;
608
+ margin-top: 10px;
609
+ padding: 10px;
610
+ background: #fff;
611
+ border-radius: 4px;
612
+ }
613
+ .rating-label {
614
+ margin-right: 10px;
615
+ font-weight: bold;
616
+ }
617
+ .rating-group {
618
+ display: flex;
619
+ gap: 10px;
620
+ }
621
+ .rating-radio {
622
+ display: none;
623
+ }
624
+ .rating-button {
625
+ padding: 8px 12px;
626
+ border: 1px solid #ccc;
627
+ border-radius: 4px;
628
+ cursor: pointer;
629
+ transition: all 0.2s;
630
+ }
631
+ .rating-radio:checked + .rating-button {
632
+ background: #007bff;
633
+ color: white;
634
+ border-color: #0056b3;
635
+ }
636
+ .save-button {
637
+ position: fixed;
638
+ bottom: 20px;
639
+ right: 20px;
640
+ padding: 10px 20px;
641
+ background: #007bff;
642
+ color: white;
643
+ border: none;
644
+ border-radius: 4px;
645
+ cursor: pointer;
646
+ }
647
+ .save-button:disabled {
648
+ background: #cccccc;
649
+ cursor: not-allowed;
650
+ }
651
+ #images-container {
652
+ /* Container for all image blocks */
653
+ }
654
+ </style>
655
+ </head>
656
+ <body>
657
+ <div id="rater-name-container">
658
+ <label for="rater-name" style="font-weight: bold;">Labeller id:</label>
659
+ <input type="text" id="rater-name" style="margin: 10px 0; padding: 5px; width: 200px;">
660
+ </div>
661
+
662
+ <div id="images-container">
663
+ <!-- Image containers will be inserted here dynamically -->
664
+ </div>
665
+ '''
666
+
667
+ # Create JavaScript arrays for each piece of data
668
+ js_image_paths = []
669
+ js_image_ids = []
670
+ js_desc1 = []
671
+ js_desc2 = []
672
+ js_desc3 = []
673
+ js_desc4 = []
674
+
675
+ for i in range(len(image_paths)):
676
+ js_image_paths.append(f'"{image_paths[i]}"')
677
+ js_image_ids.append(f'"{image_ids[i]}"')
678
+
679
+ # Process descriptions to properly handle line breaks - replace \n with <br> for HTML rendering
680
+ desc1_html = desc1[i].replace('"', '\\"').replace('\n', '<br>')
681
+ desc2_html = desc2[i].replace('"', '\\"').replace('\n', '<br>')
682
+ desc3_html = desc3[i].replace('"', '\\"').replace('\n', '<br>')
683
+ desc4_html = desc4[i].replace('"', '\\"').replace('\n', '<br>')
684
+
685
+ js_desc1.append(f'"{desc1_html}"')
686
+ js_desc2.append(f'"{desc2_html}"')
687
+ js_desc3.append(f'"{desc3_html}"')
688
+ js_desc4.append(f'"{desc4_html}"')
689
+
690
+ # Add JavaScript to handle randomization and image display
691
+ html_content += f'''
692
+ <button onclick="saveRatings()" class="save-button" id="save-button">Save Ratings as CSV</button>
693
+
694
+ <script>
695
+ // Store all image data as separate arrays
696
+ const imagePaths = [{', '.join(js_image_paths)}];
697
+ const imageIds = [{', '.join(js_image_ids)}];
698
+ const desc1 = [{', '.join(js_desc1)}];
699
+ const desc2 = [{', '.join(js_desc2)}];
700
+ const desc3 = [{', '.join(js_desc3)}];
701
+ const desc4 = [{', '.join(js_desc4)}];
702
+
703
+ // Create an array of indices to shuffle for images
704
+ let indices = [];
705
+ for (let i = 0; i < imageIds.length; i++) {{
706
+ indices.push(i);
707
+ }}
708
+
709
+ // Function to render all images in randomized order with randomized model order
710
+ function renderImages() {{
711
+ const container = document.getElementById('images-container');
712
+
713
+ // Shuffle the indices (this randomizes image order)
714
+ shuffleArray(indices);
715
+
716
+ // Clear the container
717
+ container.innerHTML = '';
718
+
719
+ // Add each image in shuffled order
720
+ indices.forEach((originalIndex, newIndex) => {{
721
+ // For each image, we'll create a different random order for the models
722
+ let modelOrder = [1, 2, 3, 4];
723
+ shuffleArray(modelOrder);
724
+
725
+ // Start building the image container HTML
726
+ let imageHtml = `
727
+ <div class="image-container" data-image-id="${{imageIds[originalIndex]}}">
728
+ <div class="image-title">Image ID: ${{imageIds[originalIndex]}}</div>
729
+ <img src="${{imagePaths[originalIndex]}}" alt="Image ${{imageIds[originalIndex]}}">
730
+ `;
731
+
732
+ // Add descriptions and rating UI for each model in the randomized order
733
+ modelOrder.forEach((modelNum, modelIndex) => {{
734
+ // Get the description data for this model
735
+ let descData;
736
+ if (modelNum === 1) descData = desc1[originalIndex];
737
+ else if (modelNum === 2) descData = desc2[originalIndex];
738
+ else if (modelNum === 3) descData = desc3[originalIndex];
739
+ else if (modelNum === 4) descData = desc4[originalIndex];
740
+
741
+ // Create HTML for this model's description and rating
742
+ imageHtml += `
743
+ <div class="description">
744
+ <div class="model-title">Model ${{modelNum}}</div>
745
+ ${{descData}}
746
+ <div class="rating" data-model="${{modelNum}}">
747
+ <span class="rating-label">Rating:</span>
748
+ <div class="rating-group">
749
+ <input type="radio" name="rating-${{newIndex}}-${{modelNum}}" value="1" class="rating-radio" data-image="${{imageIds[originalIndex]}}" data-model="${{modelNum}}" id="rating-${{newIndex}}-${{modelNum}}-1">
750
+ <label class="rating-button" for="rating-${{newIndex}}-${{modelNum}}-1">1</label>
751
+ <input type="radio" name="rating-${{newIndex}}-${{modelNum}}" value="2" class="rating-radio" data-image="${{imageIds[originalIndex]}}" data-model="${{modelNum}}" id="rating-${{newIndex}}-${{modelNum}}-2">
752
+ <label class="rating-button" for="rating-${{newIndex}}-${{modelNum}}-2">2</label>
753
+ <input type="radio" name="rating-${{newIndex}}-${{modelNum}}" value="3" class="rating-radio" data-image="${{imageIds[originalIndex]}}" data-model="${{modelNum}}" id="rating-${{newIndex}}-${{modelNum}}-3">
754
+ <label class="rating-button" for="rating-${{newIndex}}-${{modelNum}}-3">3</label>
755
+ <input type="radio" name="rating-${{newIndex}}-${{modelNum}}" value="4" class="rating-radio" data-image="${{imageIds[originalIndex]}}" data-model="${{modelNum}}" id="rating-${{newIndex}}-${{modelNum}}-4">
756
+ <label class="rating-button" for="rating-${{newIndex}}-${{modelNum}}-4">4</label>
757
+ <input type="radio" name="rating-${{newIndex}}-${{modelNum}}" value="5" class="rating-radio" data-image="${{imageIds[originalIndex]}}" data-model="${{modelNum}}" id="rating-${{newIndex}}-${{modelNum}}-5">
758
+ <label class="rating-button" for="rating-${{newIndex}}-${{modelNum}}-5">5</label>
759
+ </div>
760
+ </div>
761
+ </div>`;
762
+ }});
763
+
764
+ // Close the image container div
765
+ imageHtml += `</div>`;
766
+
767
+ // Add the complete HTML for this image to the page
768
+ container.innerHTML += imageHtml;
769
+ }});
770
+ }}
771
+
772
+ // Fisher-Yates shuffle algorithm
773
+ function shuffleArray(array) {{
774
+ for (let i = array.length - 1; i > 0; i--) {{
775
+ const j = Math.floor(Math.random() * (i + 1));
776
+ [array[i], array[j]] = [array[j], array[i]];
777
+ }}
778
+ return array;
779
+ }}
780
+
781
+ function saveRatings() {{
782
+ const labellerId = document.getElementById('rater-name').value.trim();
783
+
784
+ if (!labellerId) {{
785
+ alert('Please enter a Labeller ID before saving');
786
+ return;
787
+ }}
788
+
789
+ const ratings = [];
790
+
791
+ // Collect all ratings
792
+ document.querySelectorAll('.rating-radio:checked').forEach(radio => {{
793
+ ratings.push({{
794
+ model: radio.dataset.model,
795
+ image_id: radio.dataset.image,
796
+ rating: radio.value
797
+ }});
798
+ }});
799
+
800
+ // Convert to CSV
801
+ const headers = ['model', 'image_id', 'rating'];
802
+ const csvContent = [
803
+ headers.join(','),
804
+ ...ratings.map(row => [
805
+ row.model,
806
+ row.image_id,
807
+ row.rating
808
+ ].join(','))
809
+ ].join('\\n');
810
+
811
+ // Create and trigger download with labeller ID in filename
812
+ const blob = new Blob([csvContent], {{ type: 'text/csv;charset=utf-8;' }});
813
+ const link = document.createElement('a');
814
+ link.href = URL.createObjectURL(blob);
815
+ link.download = `ratings_${{labellerId}}.csv`;
816
+ link.click();
817
+ }}
818
+
819
+ // Add event listener to enable/disable save button based on labeller ID
820
+ document.getElementById('rater-name').addEventListener('input', function() {{
821
+ const saveButton = document.getElementById('save-button');
822
+ saveButton.disabled = !this.value.trim();
823
+ }});
824
+
825
+ // Initially disable save button
826
+ document.getElementById('save-button').disabled = true;
827
+
828
+ // Render images when the page loads
829
+ document.addEventListener('DOMContentLoaded', function() {{
830
+ renderImages();
831
+ }});
832
+ </script>
833
+ </body>
834
+ </html>
835
+ '''
836
+
837
+ with open(output_file, 'w', encoding='utf-8') as f:
838
+ f.write(html_content)
839
+
840
+ print(f"Rating form with 4 models has been generated as {output_file}")
library/utils_model.py ADDED
@@ -0,0 +1,298 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import base64
2
+ from openai import OpenAI
3
+ from PIL import Image
4
+ from io import BytesIO
5
+ import os
6
+
7
+ def resize_image(image_input, max_size=1024, preserve_aspect_ratio=True):
8
+ """
9
+ Resize an image while preserving aspect ratio
10
+
11
+ Args:
12
+ image_input: Either a PIL Image object or a string file path to an image
13
+ max_size (int): Maximum width or height (whichever is larger)
14
+ preserve_aspect_ratio (bool): Whether to preserve the aspect ratio
15
+
16
+ Returns:
17
+ PIL.Image: Resized image
18
+ """
19
+ # Load the image if it's a file path
20
+ if isinstance(image_input, str):
21
+ if not os.path.exists(image_input):
22
+ raise FileNotFoundError(f"Image file not found: {image_input}")
23
+ img = Image.open(image_input)
24
+ else:
25
+ # Already a PIL Image
26
+ img = image_input
27
+
28
+ # Get original dimensions
29
+ width, height = img.size
30
+
31
+ # Skip if already smaller than max_size
32
+ if width <= max_size and height <= max_size:
33
+ return img
34
+
35
+ if preserve_aspect_ratio:
36
+ # Calculate the ratio
37
+ if width > height:
38
+ new_width = max_size
39
+ new_height = int(height * (max_size / width))
40
+ else:
41
+ new_height = max_size
42
+ new_width = int(width * (max_size / height))
43
+ else:
44
+ new_width = max_size
45
+ new_height = max_size
46
+
47
+ # Resize the image
48
+ resized_img = img.resize((new_width, new_height), Image.LANCZOS)
49
+ return resized_img
50
+
51
+ def encode_image(image_input, format="JPEG", max_size=None):
52
+ """
53
+ Convert an image to a base64 encoded string, with optional resizing.
54
+
55
+ Args:
56
+ image_input: Either a PIL Image object or a string file path to an image
57
+ format: Image format for saving (default: "JPEG")
58
+ max_size: Maximum size for the image (width or height). If None, no resizing is done.
59
+
60
+ Returns:
61
+ base64 encoded string of the image
62
+ """
63
+ # Check if input is a file path (string) or PIL Image
64
+ if isinstance(image_input, str):
65
+ # Input is a file path
66
+ if not os.path.exists(image_input):
67
+ raise FileNotFoundError(f"Image file not found: {image_input}")
68
+
69
+ if max_size:
70
+ # Load, resize, and encode
71
+ img = Image.open(image_input)
72
+ resized_img = resize_image(img, max_size=max_size)
73
+ buffered = BytesIO()
74
+ resized_img.save(buffered, format=format)
75
+ return base64.b64encode(buffered.getvalue()).decode("utf-8")
76
+ else:
77
+ # Read file directly without resizing
78
+ with open(image_input, "rb") as image_file:
79
+ return base64.b64encode(image_file.read()).decode("utf-8")
80
+ else:
81
+ # Input is a PIL Image object
82
+ if max_size:
83
+ image_input = resize_image(image_input, max_size=max_size)
84
+
85
+ buffered = BytesIO()
86
+ image_input.save(buffered, format=format)
87
+ return base64.b64encode(buffered.getvalue()).decode("utf-8")
88
+
89
+ class OpenRouterAPI:
90
+ def __init__(self, api_key=None,base_url="https://openrouter.ai/api/v1"):
91
+ """
92
+ Initialize the OpenRouter client
93
+
94
+ Args:
95
+ api_key (str, optional): OpenRouter API key. If None, will try to get from environment variable
96
+ """
97
+ api_key = api_key or os.getenv("OPENROUTER_API_KEY")
98
+ if not api_key:
99
+ raise ValueError("OpenRouter API key not provided and not found in environment variables")
100
+
101
+ self.client = OpenAI(
102
+ api_key=api_key,
103
+ base_url=base_url
104
+ )
105
+
106
+ def list_models(self):
107
+ """
108
+ List all available models on OpenRouter
109
+
110
+ Returns:
111
+ list: List of model IDs
112
+ """
113
+ models = self.client.models.list()
114
+ model_ids = [model.id for model in models.data]
115
+ return model_ids
116
+
117
+ def generate_caption(self, image_path,
118
+ model="anthropic/claude-3-7-sonnet",
119
+ prompt_dev="",
120
+ prompt="Give a very brief description of this image.",
121
+ detail="high",
122
+ temperature=0.7,
123
+ max_image_size=1024):
124
+ """
125
+ Generate captions for an image using OpenRouter models
126
+
127
+ Args:
128
+ image_path (str): Path to the image file
129
+ model (str): Model to use (e.g., 'anthropic/claude-3-7-sonnet', 'openai/gpt-4o')
130
+ prompt_dev (str): System prompt or developer prompt
131
+ prompt (str): Text prompt to guide caption generation
132
+ detail (str): Level of detail for image analysis ('low', 'high', etc.) - only applies to OpenAI models
133
+ temperature (float): Sampling temperature for generation
134
+ max_image_size (int): Maximum dimension of the image before encoding. Set to None to disable resizing.
135
+
136
+ Returns:
137
+ str: Generated caption
138
+ """
139
+ # Getting the Base64 string with optional resizing
140
+ base64_image = encode_image(image_path, max_size=max_image_size)
141
+
142
+ # Prepare messages based on OpenRouter's format
143
+ messages = []
144
+
145
+ # Add system message if prompt_dev is provided
146
+ if prompt_dev:
147
+ messages.append({
148
+ "role": "system",
149
+ "content": prompt_dev
150
+ })
151
+
152
+ # Add user message with text and image
153
+ content = [
154
+ {
155
+ "type": "text",
156
+ "text": prompt,
157
+ }
158
+ ]
159
+
160
+ # Add image with detail parameter only for OpenAI models
161
+ if "openai" in model.lower():
162
+ content.append({
163
+ "type": "image_url",
164
+ "image_url": {"url": f"data:image/jpeg;base64,{base64_image}", "detail": detail},
165
+ })
166
+ else:
167
+ content.append({
168
+ "type": "image_url",
169
+ "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"},
170
+ })
171
+
172
+ messages.append({
173
+ "role": "user",
174
+ "content": content,
175
+ })
176
+
177
+ response = self.client.chat.completions.create(
178
+ model=model,
179
+ messages=messages,
180
+ temperature=temperature,
181
+ )
182
+
183
+ return response.choices[0].message.content
184
+
185
+ def generate_text_response(self, text_prompt,
186
+ model="anthropic/claude-3-5-haiku",
187
+ prompt_dev="",
188
+ temperature=0.7):
189
+ """
190
+ Generate responses based on text input using OpenRouter models
191
+
192
+ Args:
193
+ text_prompt (str): The text to analyze or respond to
194
+ model (str): Model to use (e.g., 'anthropic/claude-3-5-haiku', 'openai/gpt-4o-mini', 'google/gemini-pro')
195
+ prompt_dev (str): System prompt or developer prompt
196
+ temperature (float): Sampling temperature for generation
197
+
198
+ Returns:
199
+ str: Generated response
200
+ """
201
+ # Prepare messages based on OpenRouter's format
202
+ messages = []
203
+
204
+ # Add system message if prompt_dev is provided
205
+ if prompt_dev:
206
+ messages.append({
207
+ "role": "system",
208
+ "content": prompt_dev
209
+ })
210
+
211
+ # Add user message with text
212
+ messages.append({
213
+ "role": "user",
214
+ "content": text_prompt
215
+ })
216
+
217
+ response = self.client.chat.completions.create(
218
+ model=model,
219
+ messages=messages,
220
+ temperature=temperature,
221
+ )
222
+
223
+ return response.choices[0].message.content
224
+
225
+ def classify_objs(self, image_path,
226
+ categories=["Painting/sketch", "Statue/Bust", "Clothing", "Porcelain/Ceramic tableware", "Text based Document", "Other"],
227
+ model="openai/gpt-4o-mini",
228
+ detail="low",
229
+ max_image_size=512): # Using smaller size for classification since less detail is needed
230
+ """
231
+ Classify objects in an image into predefined categories
232
+
233
+ Args:
234
+ image_path (str): Path to the image file
235
+ categories (list): List of categories for classification
236
+ model (str): Model to use for classification
237
+ detail (str): Level of detail for image analysis ('low', 'high') - only applies to OpenAI models
238
+ max_image_size (int): Maximum dimension for the image. Can be smaller for classification tasks.
239
+
240
+ Returns:
241
+ str: Classification result
242
+ """
243
+ prompt = f"This is an image of a museum object. Classify it into one of these categories: {categories}. Only classify it if you are confident it belongs in that category and the category represents the main portion of the image, otherwise return 'Other'. Respond with only the category name."
244
+ return self.generate_caption(image_path, model=model, prompt=prompt, detail=detail, max_image_size=max_image_size)
245
+
246
+ def estimate_cost(self, model, tokens_in=1000, tokens_out=200, image=False, detail="low"):
247
+ """
248
+ Estimate the cost of using a specific model based on input/output tokens
249
+
250
+ Args:
251
+ model (str): Model identifier
252
+ tokens_in (int): Number of input tokens
253
+ tokens_out (int): Number of output tokens
254
+ image (bool): Whether the request includes an image
255
+ detail (str): Image detail level ('low', 'high')
256
+
257
+ Returns:
258
+ dict: Cost estimate information
259
+ """
260
+ # This is a simplified approach - in a real implementation,
261
+ # you might want to use OpenRouter's pricing API or maintain
262
+ # a more complete pricing table
263
+
264
+ # Simplified pricing mapping (in USD per 1M tokens)
265
+ # These are example values - please update with actual OpenRouter pricing
266
+ pricing = {
267
+ "anthropic/claude-3-7-sonnet": {"input": 15.0, "output": 75.0},
268
+ "anthropic/claude-3-5-haiku": {"input": 1.0, "output": 5.0},
269
+ "openai/gpt-4o": {"input": 10.0, "output": 30.0},
270
+ "openai/gpt-4o-mini": {"input": 0.2, "output": 0.6},
271
+ "google/gemini-pro": {"input": 0.5, "output": 1.5},
272
+ }
273
+
274
+ # Default to a moderate pricing if model not found
275
+ model_pricing = pricing.get(model, {"input": 5.0, "output": 15.0})
276
+
277
+ # Image tokens estimation
278
+ image_tokens = 0
279
+ if image:
280
+ if detail == "low":
281
+ image_tokens = 1200
282
+ else: # high
283
+ image_tokens = 4000
284
+
285
+ # Calculate costs
286
+ input_cost = (tokens_in + image_tokens) * model_pricing["input"] / 1000000
287
+ output_cost = tokens_out * model_pricing["output"] / 1000000
288
+ total_cost = input_cost + output_cost
289
+
290
+ return {
291
+ "model": model,
292
+ "input_tokens": tokens_in + image_tokens,
293
+ "output_tokens": tokens_out,
294
+ "input_cost": input_cost,
295
+ "output_cost": output_cost,
296
+ "total_cost": total_cost
297
+ }
298
+
library/utils_prompt.py ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ def prompt_new(title=None):
2
+ if title == None:
3
+ title_info = {}
4
+ else:
5
+ title_info = f"(titled: {title})"
6
+ return f"This object {title_info} is part of a museum collection."
7
+
8
+ def prompt_1(title=None):
9
+ if title == None:
10
+ title_info = {}
11
+ else:
12
+ title_info = f"(titled: {title})"
13
+ return f"The image {title_info} is part of a museum collection. Please give a short description of the museum object that will be used as alt-text. Do not discuss historical context other than what is included in the image. Don't mention the background, simply focus on describing the object in the image itself and try to avoid artistic interpretations and evaluative descriptions."
14
+
15
+ def context_prompt():
16
+
17
+ prompt = """
18
+ Additionally add a paragraph below starting with 'Additional context:' where you mention the broader context of the image. Mention additional information that will be useful when viewing this image considering race, sexuality, gender and class where relevant. This is a museum artifact and the description will be used for educational purposes.
19
+ So it should have this format:
20
+ main text
21
+
22
+ Additional context: additional text
23
+
24
+ The main text should be a maximum of 300 characters and the additional context should be a maximum of 120.
25
+ """
26
+ return prompt
27
+
28
+
29
+ def generate_prompt(title,category='Other',max_characters=450):
30
+ pre_prompt = f"Keep language simple, plain and informative and limit responses to a maximum of {max_characters} characters. "
31
+
32
+ def main_prompt(image='image',title=''):
33
+ return f"This {image} (titled: {title}) is part of a museum collection. Please give a concise description of the {image} that will be used as alt-text for accessibility purposes. "
34
+ #Please give a concise description of the garment that will be used as alt-text to make the museum more accessible to visually impaired people. Don't discuss historical context other than what is included in the image. Don't mention the background or setting, simply focus on describing the garment itself and try to avoid using artistic interpretations and evaluative descriptions.
35
+
36
+
37
+ extra_prompt = "Do not explicitly state the title in the description. Do not discuss historical context other than what is included in the image. Avoid concluding statements. "
38
+ extra_prompt2 = "Avoid artistic interpretations and evaluative descriptions. "
39
+ background_prompt = "Do not mention the background or setting, simply focus on describing the item itself. "
40
+ bb = "keep the description clear, concise and direct to assist visually impaired users - avoid artistic interpretations and evaluative descriptions"
41
+
42
+ if category == 'Clothing':
43
+ prompt = main_prompt(image='garment',title=title)
44
+ prompt += "Provide a concise, factual description of the garment, including its type, material, color, shape, notable design features, and any visible embellishments. "
45
+ prompt += extra_prompt
46
+ prompt += extra_prompt2
47
+
48
+ elif category == 'Statue/Bust':
49
+ prompt = main_prompt(image='sculpture',title=title)
50
+ prompt += extra_prompt
51
+ prompt += extra_prompt2
52
+ prompt += background_prompt
53
+
54
+ elif category == 'Painting/sketch':
55
+ prompt = main_prompt(image='artwork',title=title)
56
+ prompt += extra_prompt
57
+ prompt += "Focus on providing a description of the artwork including its content and also briefly its style. "
58
+ prompt += extra_prompt2
59
+
60
+ elif category == 'Porcelain/Ceramic tableware':
61
+ prompt = main_prompt(image='tablewear',title=title)
62
+ prompt += "Describe its type (e.g., plate, bowl, teacup) and notable elements of it's appearance. "
63
+ prompt += extra_prompt
64
+ prompt += extra_prompt2
65
+ prompt += background_prompt
66
+
67
+ elif category == 'Text based document':
68
+ prompt = main_prompt(image='image',title=title)
69
+ prompt = "If the text is long do not include the whole text but summarise it. "
70
+ prompt += extra_prompt
71
+ prompt += extra_prompt2
72
+ else:
73
+ #prompt = main_prompt(image='image',title=title) + extra_prompt + extra_prompt2
74
+ prompt = f"This image is titled: {title} and is part of a museum collection. Please give a concise description of the museum object that will be used as alt-text. Do not discuss historical context other than what is included in the image. Don't mention the background, simply focus on describing the object in the image itself and try to avoid artistic interpretations and evaluative descriptions."
75
+
76
+ return pre_prompt + prompt
requirements.txt CHANGED
@@ -1,2 +1,6 @@
1
  gradio==5.24.0
2
- pillow
 
 
 
 
 
1
  gradio==5.24.0
2
+ numpy>=1.24.0
3
+ Pillow>=10.0.0
4
+ requests>=2.28.0
5
+ python-dotenv>=1.0.0
6
+ openai>=1.0.0