Spaces:

Markus-Pobitzer
/

wlp_user_study

Running

App Files Files Community

Markus Pobitzer commited on Feb 22

Commit

b6d1c13

1 Parent(s): 7896daf

app

Browse files

Files changed (48) hide show

.gitattributes +2 -0
README.md +7 -7
app.py +14 -0
requirements.txt +3 -0
src/gecora/__init__.py +1 -0
src/gecora/__pycache__/__init__.cpython-312.pyc +0 -0
src/gecora/app/__init__.py +1 -0
src/gecora/app/__pycache__/__init__.cpython-312.pyc +0 -0
src/gecora/app/__pycache__/i_to_v_app.cpython-312.pyc +0 -0
src/gecora/app/i_to_v_app.py +249 -0
src/gecora/cli/__init__.py +1 -0
src/gecora/cli/loomis_painter.py +22 -0
src/gecora/dataset/__init__.py +1 -0
src/gecora/dataset/__pycache__/__init__.cpython-312.pyc +0 -0
src/gecora/dataset/__pycache__/base_manager.cpython-312.pyc +0 -0
src/gecora/dataset/__pycache__/video_manager.cpython-312.pyc +0 -0
src/gecora/dataset/__pycache__/video_pkl_manager.cpython-312.pyc +0 -0
src/gecora/dataset/__pycache__/vieo_pkl_manager.cpython-312.pyc +0 -0
src/gecora/dataset/base_manager.py +82 -0
src/gecora/dataset/create_test_dataset.py +48 -0
src/gecora/dataset/sub_dir_manager.py +117 -0
src/gecora/dataset/video_manager.py +156 -0
src/gecora/dataset/video_pkl_manager.py +207 -0
src/gecora/dataset_converting/__init__.py +1 -0
src/gecora/dataset_converting/video_pkl_to_video.py +67 -0
src/gecora/db/__init__.py +1 -0
src/gecora/db/__pycache__/__init__.cpython-312.pyc +0 -0
src/gecora/db/__pycache__/hf_jsonl.cpython-312.pyc +0 -0
src/gecora/db/__pycache__/sqlite.cpython-312.pyc +0 -0
src/gecora/db/hf_jsonl.py +385 -0
src/gecora/db/sqlite.py +279 -0
src/gecora/logging/__init__.py +1 -0
src/gecora/logging/__pycache__/__init__.cpython-312.pyc +0 -0
src/gecora/logging/__pycache__/logger.cpython-312.pyc +0 -0
src/gecora/logging/logger.py +16 -0
src/gecora/logic/__init__.py +1 -0
src/gecora/logic/__pycache__/__init__.cpython-312.pyc +0 -0
src/gecora/logic/__pycache__/base.cpython-312.pyc +0 -0
src/gecora/logic/__pycache__/loomis_painter.cpython-312.pyc +0 -0
src/gecora/logic/__pycache__/utils.cpython-312.pyc +0 -0
src/gecora/logic/base.py +53 -0
src/gecora/logic/loomis_painter.py +205 -0
src/gecora/logic/utils.py +37 -0
src/gecora/py.typed +0 -0
src/gecora/ranking/__init__.py +1 -0
src/gecora/ranking/__pycache__/__init__.cpython-312.pyc +0 -0
src/gecora/ranking/__pycache__/ranking_system.cpython-312.pyc +0 -0
src/gecora/ranking/ranking_system.py +178 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+*.mp4 filter=lfs diff=lfs merge=lfs -text
+*.png filter=lfs diff=lfs merge=lfs -text

README.md CHANGED Viewed

@@ -1,13 +1,13 @@
 ---
-title: Wlp User Study
-emoji: 🐢
-colorFrom: indigo
-colorTo: pink
 sdk: gradio
-sdk_version: 6.6.0
 app_file: app.py
 pinned: false
-short_description: User study for step-by-step painting videos
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: Test User Study
+emoji: 🏢
+colorFrom: red
+colorTo: blue
 sdk: gradio
+sdk_version: 6.5.1
 app_file: app.py
 pinned: false
+short_description: Test for a user study
 ---
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py ADDED Viewed

	@@ -0,0 +1,14 @@

+import os
+import sys
+# 1. Add the 'src' folder to the Python path so we can import 'gecora'
+sys.path.append(os.path.join(os.path.dirname(__file__), "src"))
+from gecora.logic.loomis_painter import LoomisPainterApp
+app = LoomisPainterApp(
+    root_path="./", dataset_path="data/", hf_repo_id="Markus-Pobitzer/gecora-wlp", desired_num_selections=40
+)
+app.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+imageio>=2.37.0
+pillow
+huggingface_hub

src/gecora/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ """Init of project."""

src/gecora/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (173 Bytes). View file

src/gecora/app/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ """Init."""

src/gecora/app/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (166 Bytes). View file

src/gecora/app/__pycache__/i_to_v_app.cpython-312.pyc ADDED Viewed

Binary file (10.2 kB). View file

src/gecora/app/i_to_v_app.py ADDED Viewed

	@@ -0,0 +1,249 @@

+from typing import Any, Optional, Tuple
+import gradio as gr
+from gecora.logic.base import LogicBase
+from gecora.logic.utils import cleanup_list, create_temp_file
+class ItoVApp:
+    def __init__(
+        self,
+        logic_class: LogicBase,
+        task_desc: str = "Select the best video for the reference image.",
+        ref_img_label: str = "Reference Image",
+        left_media_label: str = "Left Video",
+        right_media_label: str = "Right Video",
+        desired_num_selections: Optional[int] = None,
+    ):
+        self.logic_class = logic_class
+        self.desired_num_selections = desired_num_selections
+        # User specific input
+        self.user_input = gr.Accordion(label="1. Enter your name")
+        self.username_input = gr.Textbox(label=None, show_label=False)
+        self.select_button = gr.Button("Select Name")
+        # Preference task
+        self.task = gr.Accordion(label="2. Task", visible=False)
+        self.task_description: str = task_desc
+        self.reference_image = gr.Image(label=ref_img_label, height=512)
+        self.left_media = gr.Video(label=left_media_label, height=512, autoplay=True)
+        self.right_media = gr.Video(label=right_media_label, height=512, autoplay=True)
+        self.left_button = gr.Button("←")
+        self.tie_button = gr.Button("Tie")
+        self.right_button = gr.Button("→")
+        self.tmp_video_path_left = create_temp_file()
+        self.tmp_video_path_right = create_temp_file()
+    def set_username(self, username):
+        user_id = self.logic_class.set_username(username=username)
+        if username and user_id is not None:
+            next_comp = self.logic_class.get_next_comparison(user_id=user_id)
+            if next_comp is None:
+                gr.Error("Error: Loading the content failed.")
+                return 0, gr.update(visible=False), "", "", "", "", None, None, None
+            (
+                (ret_reference_id, ret_model_left_id, ret_model_right_id),
+                (reference_image, left_video, right_video),
+                (num_preferences, total_num_comparison),
+            ) = next_comp
+            progress_str = str(num_preferences) + " preferences selected!"
+            if total_num_comparison > 0:
+                perc_num = (
+                    self.desired_num_selections if self.desired_num_selections is not None else total_num_comparison
+                )
+                progress_str = (
+                    str(int(num_preferences / perc_num * 100))
+                    + f"% ({num_preferences} / {total_num_comparison} total) preferences selected!"
+                )
+            return (
+                user_id,
+                gr.update(visible=True),
+                ret_reference_id,
+                ret_model_left_id,
+                ret_model_right_id,
+                progress_str,
+                reference_image,
+                left_video,
+                right_video,
+            )
+        else:
+            gr.Error(
+                f"Failed to set Username {username}. Make sure to put in a value in the textfield, otherwise try to reload webpage."
+            )
+            return 0, gr.update(visible=False), "", "", "", "", None, None, None
+    def set_preference(
+        self, user_id, reference_id: str, model_left_id: str, model_right_id: str, preferred_side: str
+    ) -> Tuple[str, str, str, str, Any, Any, Any]:
+        """Returns reference_id: str, model_left_id: str, model_right_id: str, reference_image: PIL.Image.Image, left_video:str, right_video:str."""
+        user_id = int(user_id)  # type: ignore
+        succ, msg = self.logic_class.set_preference(
+            user_id=user_id,
+            reference_id=reference_id,
+            model_left_id=model_left_id,
+            model_right_id=model_right_id,
+            preferred_side=preferred_side,
+        )
+        if not succ:
+            gr.Info(f"Something went wrong: {msg}")
+        next_comp = self.logic_class.get_next_comparison(user_id=user_id)
+        if next_comp is None:
+            gr.Error("We are sorry, something went wrong! Please try to reload the page.")
+            return "", "", "", "", None, None, None  # type: ignore
+        if preferred_side == "left":
+            gr.Success("You chose the left side!\n💪🙂")
+        elif preferred_side == "right":
+            gr.Success("You chose the right side!\n🙂💪")
+        elif preferred_side == "tie":
+            gr.Success("It's a tie!\n🤝")
+        (
+            (ret_reference_id, ret_model_left_id, ret_model_right_id),
+            (reference_image, left_video, right_video),
+            (num_preferences, total_num_comparison),
+        ) = next_comp
+        progress_str = str(num_preferences) + " preferences selected!"
+        if total_num_comparison > 0:
+            perc_num = self.desired_num_selections if self.desired_num_selections is not None else total_num_comparison
+            progress_str = (
+                str(int(num_preferences / perc_num * 100))
+                + f"% ({num_preferences} / {total_num_comparison} total) preferences selected!"
+            )
+        return (
+            ret_reference_id,
+            ret_model_left_id,
+            ret_model_right_id,
+            progress_str,
+            reference_image,
+            left_video,
+            right_video,
+        )
+    def choose_left(self, user_id: str, reference_id: str, model_left_id: str, model_right_id: str):
+        return self.set_preference(
+            user_id=user_id,
+            reference_id=reference_id,
+            model_left_id=model_left_id,
+            model_right_id=model_right_id,
+            preferred_side="left",
+        )
+    def choose_tie(self, user_id: str, reference_id: str, model_left_id: str, model_right_id: str):
+        return self.set_preference(
+            user_id=user_id,
+            reference_id=reference_id,
+            model_left_id=model_left_id,
+            model_right_id=model_right_id,
+            preferred_side="tie",
+        )
+    def choose_right(self, user_id: str, reference_id: str, model_left_id: str, model_right_id: str):
+        return self.set_preference(
+            user_id=user_id,
+            reference_id=reference_id,
+            model_left_id=model_left_id,
+            model_right_id=model_right_id,
+            preferred_side="right",
+        )
+    def build_interface(self):
+        with gr.Blocks(theme=gr.themes.Monochrome()) as demo:
+            local_storage = gr.BrowserState(["", ""])
+            with self.user_input.render():
+                with gr.Row():
+                    user_id = gr.Textbox(show_label=False, visible=False)
+                    self.username_input.render()
+                    self.select_button.render()
+            with self.task.render():
+                gr.Markdown(self.task_description)
+                reference_id = gr.Textbox(show_label=False, visible=False)
+                model_left_id = gr.Textbox(show_label=False, visible=False)
+                model_right_id = gr.Textbox(show_label=False, visible=False)
+                progress = gr.Textbox(show_label=False, text_align="right")
+                with gr.Row():
+                    self.left_media.render()
+                    self.reference_image.render()
+                    self.right_media.render()
+                with gr.Row():
+                    self.left_button.render()
+                    self.tie_button.render()
+                    self.right_button.render()
+            @demo.load(inputs=[local_storage], outputs=[user_id, self.username_input])
+            def load_from_local_storage(saved_values):
+                return saved_values[0], saved_values[1]
+            @gr.on([user_id.change], inputs=[user_id, self.username_input], outputs=[local_storage])
+            def save_to_local_storage(user_id, username):
+                return [user_id, username]
+            self.select_button.click(
+                self.set_username,
+                inputs=[self.username_input],
+                outputs=[
+                    user_id,
+                    self.task,
+                    reference_id,
+                    model_left_id,
+                    model_right_id,
+                    progress,
+                    self.reference_image,
+                    self.left_media,
+                    self.right_media,
+                ],
+            )
+            self.left_button.click(
+                self.choose_left,
+                inputs=[user_id, reference_id, model_left_id, model_right_id],
+                outputs=[
+                    reference_id,
+                    model_left_id,
+                    model_right_id,
+                    progress,
+                    self.reference_image,
+                    self.left_media,
+                    self.right_media,
+                ],
+            )
+            self.tie_button.click(
+                self.choose_tie,
+                inputs=[user_id, reference_id, model_left_id, model_right_id],
+                outputs=[
+                    reference_id,
+                    model_left_id,
+                    model_right_id,
+                    progress,
+                    self.reference_image,
+                    self.left_media,
+                    self.right_media,
+                ],
+            )
+            self.right_button.click(
+                self.choose_right,
+                inputs=[user_id, reference_id, model_left_id, model_right_id],
+                outputs=[
+                    reference_id,
+                    model_left_id,
+                    model_right_id,
+                    progress,
+                    self.reference_image,
+                    self.left_media,
+                    self.right_media,
+                ],
+            )
+            demo.unload(fn=lambda: cleanup_list([self.tmp_video_path_left, self.tmp_video_path_right]))
+        return demo
+    def launch(self):
+        app = self.build_interface()
+        app.launch(server_name="0.0.0.0")

src/gecora/cli/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ """Init."""

src/gecora/cli/loomis_painter.py ADDED Viewed

	@@ -0,0 +1,22 @@

+import argparse
+from gecora.logic.loomis_painter import LoomisPainterApp
+def main():
+    parser = argparse.ArgumentParser(
+        description="Loomis Painter Ranking CLI - Evaluate painting processes and update model rankings."
+    )
+    parser.add_argument("root_path", type=str, help="Root directory containing Hugging Face datasets.")
+    parser.add_argument("--dataset_path", type=str, help="Optional path to the dataset.")
+    args = parser.parse_args()
+    app = LoomisPainterApp(args.root_path, args.dataset_path)
+    print("App initialized.")
+    app.launch()
+if __name__ == "__main__":
+    main()

src/gecora/dataset/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ """Init."""

src/gecora/dataset/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (170 Bytes). View file

src/gecora/dataset/__pycache__/base_manager.cpython-312.pyc ADDED Viewed

Binary file (4.69 kB). View file

src/gecora/dataset/__pycache__/video_manager.cpython-312.pyc ADDED Viewed

Binary file (8.44 kB). View file

src/gecora/dataset/__pycache__/video_pkl_manager.cpython-312.pyc ADDED Viewed

Binary file (11.4 kB). View file

src/gecora/dataset/__pycache__/vieo_pkl_manager.cpython-312.pyc ADDED Viewed

Binary file (11.4 kB). View file

src/gecora/dataset/base_manager.py ADDED Viewed

	@@ -0,0 +1,82 @@

+import json
+import logging
+import os
+from dataclasses import asdict, dataclass
+from typing import Any, List, Optional, Tuple
+from gecora.logging.logger import setup_file_logger
+@dataclass
+class DatasetManagerConfig:
+    root_path: str
+    gt_dataset: str = "gt"
+    dataset_split: str = "test"
+    entry_id: str = "entry_id"
+    reference_column_name: str = "reference"
+    genereated_column_name: str = "generated"
+    logging_path: Optional[str] = None
+    def to_json(self, path: str):
+        with open(path, "w") as f:
+            json.dump(asdict(self), f)
+    @classmethod
+    def from_json(cls, path: str):
+        with open(path, "r") as f:
+            data = json.load(f)
+        return cls(**data)
+class BaseDatasetManager:
+    """
+    A class to manage Hugging Face datasets stored in subdirectories of a given root path.
+    Attributes:
+        root_path (str): The root directory containing subfolders with Hugging Face datasets.
+        dataset_split (str): If the datasets are a Dict, select specified split.
+        common_entry_ids (List[str]): List of 'entry_id's present in all datasets.
+        partial_entry_ids (List[str]): List of 'entry_id's present in only some datasets.
+    """
+    def __init__(self, config: DatasetManagerConfig):
+        self.config = config
+        self.common_entry_ids: List[str] = []
+        self.partial_entry_ids: List[str] = []
+        logger_name = f"{self.__class__.__name__}_logger"
+        log_file = os.path.join(self.config.logging_path, f"{logger_name}.txt") if self.config.logging_path else None
+        self.logger = setup_file_logger(logger_name, log_file) if log_file else logging.getLogger(logger_name)
+        self.logger = logging.getLogger()
+    def get_dataset_names(self) -> List[str]:
+        """
+        Returns a list of all loaded dataset names.
+        Returns:
+            List[str]: Names of the datasets.
+        """
+        raise ValueError("Must be implemented by child class.")
+    def get_entries_by_id(
+        self, entry_id: str, dataset_name1: str, dataset_name2: str
+    ) -> Tuple[Optional[Any], Optional[Any], Optional[Any]]:
+        """
+        Retrieves entries from two datasets by a specific 'entry_id'.
+        Args:
+            entry_id (str): The 'entry_id' to search for.
+            dataset_name1 (str): Name of the first dataset.
+            dataset_name2 (str): Name of the second dataset.
+        Returns:
+            Tuple[Optional[Any], Optional[Any], Optional[Any]]:
+                Reference content, or None if not found.
+                Generated entry from dataset_name1 matching the 'entry_id', or None if not found.
+                Generated entry from dataset_name2 matching the 'entry_id', or None if not found.
+        Raises:
+            ValueError: If one or both dataset names are not found.
+        """
+        raise ValueError("Must be implemented by child class.")

src/gecora/dataset/create_test_dataset.py ADDED Viewed

	@@ -0,0 +1,48 @@

+import os
+import imageio
+import numpy as np
+import pandas as pd
+from datasets import Dataset
+from PIL import Image
+def generate_random_image(size=(512, 512)):
+    array = np.random.randint(0, 256, size + (3,), dtype=np.uint8)
+    return Image.fromarray(array)
+def generate_random_video(num_frames=10, size=(512, 512)):
+    frames = [np.random.randint(0, 256, size + (3,), dtype=np.uint8) for _ in range(num_frames)]
+    return frames
+def save_video(frames, path):
+    imageio.mimsave(path, frames, fps=5)
+def create_i_to_v_huggingface_datasets(output_dir, num_datasets=3, num_entries=5):
+    os.makedirs(output_dir, exist_ok=True)
+    entry_ids = [f"id_{i}" for i in range(num_entries)]
+    for dataset_index in range(num_datasets):
+        data = {"entry_id": [], "reference_image": [], "video": []}
+        dataset_path = os.path.join(output_dir, f"dataset_{dataset_index}")
+        os.makedirs(dataset_path, exist_ok=True)
+        for entry_id in entry_ids:
+            img = generate_random_image()
+            img_path = os.path.join(dataset_path, f"{entry_id}_image.png")
+            img.save(img_path)
+            video_frames = generate_random_video()
+            video_path = os.path.join(dataset_path, f"{entry_id}_video.mp4")
+            save_video(video_frames, video_path)
+            data["entry_id"].append(entry_id)
+            data["reference_image"].append(img_path)
+            data["video"].append(video_path)
+        df = pd.DataFrame(data)
+        hf_dataset = Dataset.from_pandas(df)
+        hf_dataset.save_to_disk(dataset_path)

src/gecora/dataset/sub_dir_manager.py ADDED Viewed

	@@ -0,0 +1,117 @@

+import os
+from typing import Any, Dict, List, Optional, Tuple, Union
+from datasets import Dataset, DatasetDict, load_from_disk
+from gecora.dataset.base_manager import BaseDatasetManager, DatasetManagerConfig
+class SubDirDatasetManager(BaseDatasetManager):
+    """
+    A class to manage Hugging Face datasets stored in subdirectories of a given root path.
+    Attributes:
+        root_path (str): The root directory containing subfolders with Hugging Face datasets.
+        dataset_split (str): If the datasets are a Dict, select specified split.
+        datasets (Dict[str, Union[Dataset, DatasetDict]]): Dictionary mapping dataset names to loaded datasets.
+        common_entry_ids (List[str]): List of 'entry_id's present in all datasets.
+        partial_entry_ids (List[str]): List of 'entry_id's present in only some datasets.
+    """
+    def __init__(self, config: DatasetManagerConfig) -> None:
+        """
+        Initializes the dataset manager and loads datasets from subdirectories.
+        Args:
+            config (DatasetManagerConfig): Configuration.
+        """
+        super().__init__(config=config)
+        self.datasets: Dict[str, Union[Dataset, DatasetDict]] = {}
+        self._load_datasets()
+        self._analyze_entry_ids()
+        self.logger.info(f"Loaded following datasets: {self.get_dataset_names()}")
+        self.logger.info(f"Found {len(self.common_entry_ids)} entries in all datasets.")
+        self.logger.info(f"Found {len(self.partial_entry_ids)} entries only in some datasets.")
+    def _load_datasets(self) -> None:
+        """
+        Loads all Hugging Face datasets from subdirectories in the root path.
+        """
+        for subdir in os.listdir(self.config.root_path):
+            full_path = os.path.join(self.config.root_path, subdir)
+            if os.path.isdir(full_path):
+                try:
+                    dataset = load_from_disk(full_path)
+                    self.datasets[subdir] = dataset
+                except Exception as e:
+                    self.logger.info(f"Skipping {subdir}: {e}")
+    def get_dataset_names(self) -> List[str]:
+        """
+        Returns a list of all loaded dataset names.
+        Returns:
+            List[str]: Names of the datasets.
+        """
+        return list(self.datasets.keys())
+    def _analyze_entry_ids(self) -> None:
+        """
+        Analyzes all datasets to find common and partial 'entry_id's.
+        Updates `common_entry_ids` and `partial_entry_ids` attributes.
+        """
+        entry_id_sets: List[set] = []
+        for dataset in self.datasets.values():
+            if isinstance(dataset, DatasetDict):
+                dataset = dataset[self.config.dataset_split]
+            entry_ids = set(dataset[self.config.entry_id])
+            entry_id_sets.append(entry_ids)
+        if entry_id_sets:
+            self.common_entry_ids = list(set.intersection(*entry_id_sets))
+            all_entry_ids = set.union(*entry_id_sets)
+            self.partial_entry_ids = list(all_entry_ids - set(self.common_entry_ids))
+    def get_entries_by_id(
+        self, entry_id: str, dataset_name1: str, dataset_name2: str
+    ) -> Tuple[Optional[Any], Optional[Any], Optional[Any]]:
+        """
+        Retrieves entries from two datasets by a specific 'entry_id'.
+        Args:
+            entry_id (str): The 'entry_id' to search for.
+            dataset_name1 (str): Name of the first dataset.
+            dataset_name2 (str): Name of the second dataset.
+        Returns:
+            Tuple[Optional[Any], Optional[Any], Optional[Any]]:
+                Reference content, or None if not found.
+                Generated entry from dataset_name1 matching the 'entry_id', or None if not found.
+                Generated entry from dataset_name2 matching the 'entry_id', or None if not found.
+        Raises:
+            ValueError: If one or both dataset names are not found.
+        """
+        if dataset_name1 not in self.datasets or dataset_name2 not in self.datasets:
+            raise ValueError("One or both dataset names not found.")
+        def find_entry(dataset: Union[Dataset, DatasetDict], entry_id: str) -> Optional[Dict]:
+            if isinstance(dataset, DatasetDict):
+                dataset = dataset[self.config.dataset_split]
+            for entry in dataset:
+                if entry.get(self.config.entry_id) == entry_id:
+                    return entry
+            return None
+        entry1 = find_entry(self.datasets[dataset_name1], entry_id)
+        entry2 = find_entry(self.datasets[dataset_name2], entry_id)
+        if entry1 is not None:
+            reference_image = entry1[self.config.reference_column_name]
+        else:
+            reference_image = None
+        ret_entry1 = entry1[self.config.genereated_column_name] if entry1 is not None else None
+        ret_entry2 = entry2[self.config.genereated_column_name] if entry2 is not None else None
+        return reference_image, ret_entry1, ret_entry2

src/gecora/dataset/video_manager.py ADDED Viewed

	@@ -0,0 +1,156 @@

+import logging
+from pathlib import Path
+from typing import Any, Dict, List, Optional, Tuple
+from gecora.dataset.base_manager import BaseDatasetManager, DatasetManagerConfig
+logging.basicConfig(
+    format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
+    datefmt="%m/%d/%Y %H:%M:%S",
+    level=logging.INFO,
+)
+logger = logging.getLogger(__name__)
+class VideoManager(BaseDatasetManager):
+    def __init__(self, config: DatasetManagerConfig) -> None:
+        """
+        Initializes the dataset manager and loads datasets from subdirectories.
+        Args:
+            config (DatasetManagerConfig): Configuration.
+        """
+        super().__init__(config=config)
+        self.datasets: Dict[str, Dict[str, Dict[str, str]]] = {}
+        self.reference_images: Dict[str, Dict[str, str]] = {}
+        self.root_path = Path(self.config.root_path)
+        self.common_entry_ids = []
+        self.partial_entry_ids = []
+        self._load_datasets()
+        self._analyze_entry_ids()
+        self.logger.info(f"Loaded following datasets: {self.get_dataset_names()}")
+        self.logger.info(f"Found {len(self.common_entry_ids)} entries in all datasets.")
+        self.logger.info(f"Found {len(self.partial_entry_ids)} entries only in some datasets.")
+    def _collect_reference_images(self, dataset_dir: Path, split: str) -> Dict[str, Dict[str, str]]:
+        """Collects all reference images under the split."""
+        split_dir = dataset_dir / split
+        dataset: Dict[str, Dict[str, str]] = {}
+        for source_dir in split_dir.iterdir():
+            if source_dir.is_dir():
+                for video_dir in source_dir.iterdir():
+                    if video_dir.is_dir():
+                        reference_image = video_dir / "reference_image.png"
+                        if reference_image.is_file():
+                            source = source_dir.stem
+                            url = video_dir.stem
+                            id = source + "_" + url
+                            dataset[id] = {
+                                "source": source,
+                                "url": url,
+                                "id": id,
+                                "reference_image": str(reference_image),
+                            }
+        return dataset
+    def _collect_video_dirs(self, dataset_dir: Path, split: str) -> Dict[str, Dict[str, str]]:
+        """Collects all video subdirectories under the split."""
+        split_dir = dataset_dir / split
+        dataset: Dict[str, Dict[str, str]] = {}
+        for source_dir in split_dir.iterdir():
+            if source_dir.is_dir():
+                for video_dir in source_dir.iterdir():
+                    if video_dir.is_dir():
+                        video_file = video_dir / "video.mp4"
+                        if video_file.is_file():
+                            source = source_dir.stem
+                            url = video_dir.stem
+                            id = source + "_" + url
+                            dataset[id] = {
+                                "source": source,
+                                "url": url,
+                                "id": id,
+                                "video": str(video_file),
+                            }
+        return dataset
+    def _load_datasets(self) -> None:
+        """
+        Loads all datasets from subdirectories in the root path.
+        """
+        for subdir in self.root_path.iterdir():
+            if subdir.is_dir():
+                dataset_name = subdir.stem
+                if dataset_name == self.config.gt_dataset:
+                    # GT dataset only use reference images
+                    self.reference_images = self._collect_reference_images(subdir, split=self.config.dataset_split)
+                    if len(self.reference_images.keys()) == 0:
+                        raise ValueError(f"No entries found for ground truth dataset {dataset_name} under {subdir}.")
+                else:
+                    try:
+                        dataset = self._collect_video_dirs(subdir, split=self.config.dataset_split)
+                        if len(dataset.keys()) == 0:
+                            self.logger.info(f"No entries found for datataset {dataset}")
+                        else:
+                            self.datasets[dataset_name] = dataset
+                    except Exception as e:
+                        self.logger.info(f"Skipping {subdir}: {e}")
+    def get_dataset_names(self) -> List[str]:
+        """
+        Returns a list of all loaded dataset names.
+        Returns:
+            List[str]: Names of the datasets.
+        """
+        return list(self.datasets.keys())
+    def _analyze_entry_ids(self) -> None:
+        """
+        Analyzes all datasets to find common and partial 'entry_id's.
+        Updates `common_entry_ids` and `partial_entry_ids` attributes.
+        """
+        entry_id_sets: List[set] = []
+        # GT reference images
+        entry_id_sets.append(set(self.reference_images.keys()))
+        # The other datasets
+        for dataset in self.datasets.values():
+            entry_ids = set(dataset.keys())
+            entry_id_sets.append(entry_ids)
+        if entry_id_sets:
+            self.common_entry_ids = list(set.intersection(*entry_id_sets))
+            all_entry_ids = set.union(*entry_id_sets)
+            self.partial_entry_ids = list(all_entry_ids - set(self.common_entry_ids))
+    def get_entries_by_id(
+        self, entry_id: str, dataset_name1: str, dataset_name2: str
+    ) -> Tuple[Optional[Any], Optional[Any], Optional[Any]]:
+        """
+        Retrieves entries from two datasets by a specific 'entry_id'.
+        Args:
+            entry_id (str): The 'entry_id' to search for.
+            dataset_name1 (str): Name of the first dataset.
+            dataset_name2 (str): Name of the second dataset.
+        Returns:
+            Tuple[Optional[Any], Optional[Any], Optional[Any]]:
+                Reference content, or None if not found.
+                Generated entry from dataset_name1 matching the 'entry_id', or None if not found.
+                Generated entry from dataset_name2 matching the 'entry_id', or None if not found.
+        Raises:
+            ValueError: If one or both dataset names are not found.
+        """
+        if dataset_name1 not in self.datasets or dataset_name2 not in self.datasets:
+            raise ValueError("One or both dataset names not found.")
+        entry1 = self.datasets[dataset_name1][entry_id]
+        entry2 = self.datasets[dataset_name2][entry_id]
+        reference_image = self.reference_images[entry_id]["reference_image"]
+        return reference_image, entry1["video"], entry2["video"]

src/gecora/dataset/video_pkl_manager.py ADDED Viewed

	@@ -0,0 +1,207 @@

+import logging
+import os
+import pickle
+from io import BytesIO
+from pathlib import Path
+from typing import Any, Dict, List, Optional, Tuple
+from PIL import Image
+from gecora.dataset.base_manager import BaseDatasetManager, DatasetManagerConfig
+logger = logging.getLogger(__name__)
+class VideoFrameDataset:
+    """
+    PyTorch Dataset for loading video frames and progress values from a custom directory structure.
+    Each item corresponds to one video and returns:
+        - frames: List of compressed image bytes
+        - progress: List of float progress values
+    Args:
+        dataset_dir (str): Root directory of the dataset.
+        split (str): Either 'train' or 'test'.
+    """
+    def __init__(
+        self,
+        dataset_dir: str,
+        split: str = "test",
+    ):
+        assert split in ["train", "test", "eval"], "Split must be 'train', 'test' or 'eval'"
+        self.dataset_dir = Path(dataset_dir)
+        self.split = split
+        self.video_dirs: List[Path] = self._collect_video_dirs()
+        self.video_id_to_idx: Dict[str, int] = {}
+        for idx, video_dir in enumerate(self.video_dirs):
+            url = video_dir.stem
+            source = video_dir.parts[-2]
+            id = source + "_" + url
+            self.video_id_to_idx[id] = idx
+    def _collect_video_dirs(self) -> List[Path]:
+        """Collects all video subdirectories under the split."""
+        split_dir = self.dataset_dir / self.split
+        video_dirs = []
+        for source_dir in split_dir.iterdir():
+            if source_dir.is_dir():
+                for video_dir in source_dir.iterdir():
+                    if video_dir.is_dir():
+                        video_dirs.append(video_dir)
+        return sorted(video_dirs)
+    def _load_ref_frame(self, video_dir: Path, frame_data: List[bytes]) -> Image.Image:
+        reference_frame_path = video_dir / "reference_frame.png"
+        reference_frame = None
+        try:
+            reference_frame = Image.open(reference_frame_path)
+        except Exception:
+            pass
+        # Load last frame if reference frame is not set
+        if reference_frame is None:
+            reference_frame = Image.open(BytesIO(frame_data[-1]))
+        return reference_frame.convert("RGB")
+    def _prepare_frame(self, frame: bytes) -> Image.Image:
+        img = Image.open(BytesIO(frame)).convert("RGB")
+        return img
+    def __len__(self) -> int:
+        return len(self.video_dirs)
+    def __getitem__(self, idx: int) -> Any:
+        video_dir = self.video_dirs[idx]
+        frame_path = video_dir / "frame_data.pkl"
+        progress_path = video_dir / "frame_progress.pkl"
+        try:
+            with open(frame_path, "rb") as f:
+                frame_data: List[bytes] = pickle.load(f)
+            reference_image = self._load_ref_frame(video_dir=video_dir, frame_data=frame_data)
+            with open(progress_path, "rb") as f:
+                frame_progress: List[float] = pickle.load(f)
+            frame_img_list = [self._prepare_frame(fd) for fd in frame_data]
+        except Exception as e:
+            logger.error(f"While loading data for video {video_dir} an error occurred: {e}")
+            # Better to raise an error as long as we do not have a workaround
+            raise (ValueError(e))
+        return {
+            "video_dir": str(video_dir),
+            "video": frame_img_list,
+            "reference_image": reference_image,
+            "progress_steps": frame_progress,
+        }
+class VideoPklManager(BaseDatasetManager):
+    """
+    A class to manage datasets stored in video pkl subdirectories of a given root path.
+    Attributes:
+        root_path (str): The root directory containing subfolders with Hugging Face datasets.
+        dataset_split (str): If the datasets are a Dict, select specified split.
+        datasets (Dict[str, Union[Dataset, DatasetDict]]): Dictionary mapping dataset names to loaded datasets.
+        common_entry_ids (List[str]): List of 'entry_id's present in all datasets.
+        partial_entry_ids (List[str]): List of 'entry_id's present in only some datasets.
+    """
+    def __init__(self, config: DatasetManagerConfig) -> None:
+        """
+        Initializes the dataset manager and loads datasets from subdirectories.
+        Args:
+            config (DatasetManagerConfig): Configuration.
+        """
+        super().__init__(config=config)
+        self.datasets: Dict[str, VideoFrameDataset] = {}
+        self._load_datasets()
+        self._analyze_entry_ids()
+        self.logger.info(f"Loaded following datasets: {self.get_dataset_names()}")
+        self.logger.info(f"Found {len(self.common_entry_ids)} entries in all datasets.")
+        self.logger.info(f"Found {len(self.partial_entry_ids)} entries only in some datasets.")
+    def _load_datasets(self) -> None:
+        """
+        Loads all Hugging Face datasets from subdirectories in the root path.
+        """
+        for subdir in os.listdir(self.config.root_path):
+            full_path = os.path.join(self.config.root_path, subdir)
+            if os.path.isdir(full_path):
+                try:
+                    dataset = VideoFrameDataset(full_path, split=self.config.dataset_split)
+                    self.datasets[subdir] = dataset
+                except Exception as e:
+                    self.logger.info(f"Skipping {subdir}: {e}")
+    def get_dataset_names(self) -> List[str]:
+        """
+        Returns a list of all loaded dataset names.
+        Returns:
+            List[str]: Names of the datasets.
+        """
+        return list(self.datasets.keys())
+    def _analyze_entry_ids(self) -> None:
+        """
+        Analyzes all datasets to find common and partial 'entry_id's.
+        Updates `common_entry_ids` and `partial_entry_ids` attributes.
+        """
+        entry_id_sets: List[set] = []
+        for dataset in self.datasets.values():
+            entry_ids = set(dataset.video_id_to_idx.keys())
+            entry_id_sets.append(entry_ids)
+        if entry_id_sets:
+            self.common_entry_ids = list(set.intersection(*entry_id_sets))
+            all_entry_ids = set.union(*entry_id_sets)
+            self.partial_entry_ids = list(all_entry_ids - set(self.common_entry_ids))
+    def get_entries_by_id(
+        self, entry_id: str, dataset_name1: str, dataset_name2: str
+    ) -> Tuple[Optional[Any], Optional[Any], Optional[Any]]:
+        """
+        Retrieves entries from two datasets by a specific 'entry_id'.
+        Args:
+            entry_id (str): The 'entry_id' to search for.
+            dataset_name1 (str): Name of the first dataset.
+            dataset_name2 (str): Name of the second dataset.
+        Returns:
+            Tuple[Optional[Any], Optional[Any], Optional[Any]]:
+                Reference content, or None if not found.
+                Generated entry from dataset_name1 matching the 'entry_id', or None if not found.
+                Generated entry from dataset_name2 matching the 'entry_id', or None if not found.
+        Raises:
+            ValueError: If one or both dataset names are not found.
+        """
+        if dataset_name1 not in self.datasets or dataset_name2 not in self.datasets:
+            raise ValueError("One or both dataset names not found.")
+        def find_entry(dataset: VideoFrameDataset, entry_id: str) -> Optional[Dict]:
+            if entry_id in dataset.video_id_to_idx:
+                return dataset[dataset.video_id_to_idx[entry_id]]
+            else:
+                return None
+        entry1 = find_entry(self.datasets[dataset_name1], entry_id)
+        entry2 = find_entry(self.datasets[dataset_name2], entry_id)
+        reference_image = entry1["reference_image"] if entry1 is not None else None
+        ret_entry1 = entry1["video"] if entry1 is not None else None
+        ret_entry2 = entry2["video"] if entry2 is not None else None
+        return reference_image, ret_entry1, ret_entry2

src/gecora/dataset_converting/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ """Init."""

src/gecora/dataset_converting/video_pkl_to_video.py ADDED Viewed

	@@ -0,0 +1,67 @@

+import argparse
+from pathlib import Path
+from tqdm import tqdm
+from gecora.dataset.video_pkl_manager import VideoFrameDataset
+from gecora.logic.utils import save_video_from_frames
+def convert(
+    dataset_dir: str,
+    output_dir: str,
+    split: str = "test",
+    fps: int = 3,
+):
+    """
+    Converts a VideoFrameDataset into video files.
+    Args:
+        dataset_dir: Path to the dataset directory.
+        output_dir: Path where the output videos will be saved.
+        split: The dataset split to process (e.g., 'train', 'test', 'eval').
+        fps: Frames per second for the output video.
+    """
+    print(f"Loading dataset from: {dataset_dir} (Split: {split})")
+    dataset = VideoFrameDataset(dataset_dir=dataset_dir, split=split)
+    num_entries = len(dataset)
+    print(f"Found {num_entries} entries. Starting conversion...")
+    for idx in tqdm(range(num_entries)):
+        entry_dict = dataset[idx]
+        video_dir = entry_dict["video_dir"]
+        # Extract source and ID from the path parts
+        dir_split = Path(video_dir).parts
+        source = dir_split[-2]
+        id = dir_split[-1]
+        # Construct output path
+        out_path = Path(output_dir) / split / source / id
+        out_path.mkdir(parents=True, exist_ok=True)
+        # Save reference_image
+        entry_dict["reference_image"].save(out_path / "reference_image.png")
+        # Save the video
+        output_file = out_path / "video.mp4"
+        save_video_from_frames(entry_dict["video"], video_output_path=str(output_file), fps=fps)
+    print("Conversion complete.")
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Convert VideoFrameDataset entries to MP4 video files.")
+    # Required arguments
+    parser.add_argument("--dataset_dir", type=str, required=True, help="Path to the root directory of the dataset.")
+    parser.add_argument("--output_dir", type=str, required=True, help="Directory where output videos will be saved.")
+    # Optional arguments
+    parser.add_argument("--split", type=str, default="test", help="Dataset split to process (default: 'test').")
+    parser.add_argument("--fps", type=int, default=3, help="Frames per second for the output video (default: 2).")
+    args = parser.parse_args()
+    convert(dataset_dir=args.dataset_dir, output_dir=args.output_dir, split=args.split, fps=args.fps)

src/gecora/db/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ """Init."""

src/gecora/db/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (165 Bytes). View file

src/gecora/db/__pycache__/hf_jsonl.cpython-312.pyc ADDED Viewed

Binary file (16.4 kB). View file

src/gecora/db/__pycache__/sqlite.cpython-312.pyc ADDED Viewed

Binary file (14 kB). View file

src/gecora/db/hf_jsonl.py ADDED Viewed

	@@ -0,0 +1,385 @@

+"""Class for Database handling using HuggingFace Datasets with JSONL storage."""
+import json
+import logging
+import os
+from datetime import datetime
+from typing import Any, Dict, List, Optional, Tuple
+from huggingface_hub import HfApi, hf_hub_download, upload_file
+class HFJsonlDB:
+    """
+    Handles database operations using HuggingFace datasets with JSONL files.
+    Stores data in JSONL files on HuggingFace datasets for persistent storage in HF Spaces.
+    Attributes:
+        repo_id (str): HuggingFace repository ID (e.g., "username/dataset-name").
+        experiment_name (str): Name of the experiment (used to name files).
+        token (Optional[str]): HuggingFace API token for authentication.
+        users_filename (str): Filename for users JSONL file.
+        preferences_filename (str): Filename for preferences JSONL file.
+        logger (logging.Logger): Logger instance for logging database operations.
+        hf_api (HfApi): HuggingFace API client.
+    """
+    def __init__(
+        self,
+        repo_id: str,
+        experiment_name: str = "arena",
+        token: Optional[str] = None,
+        log_folder_path: Optional[str] = None,
+    ):
+        """
+        Initializes the HFJsonlDB instance.
+        Args:
+            repo_id (str): HuggingFace repository ID (e.g., "username/dataset-name").
+            experiment_name (str, optional): Name of the experiment. Defaults to "arena".
+            token (Optional[str], optional): HuggingFace API token. If None, uses HF_TOKEN env var.
+            log_folder_path (Optional[str], optional): Path for log file. Defaults to current directory.
+        """
+        self.repo_id = repo_id
+        self.experiment_name = experiment_name
+        self.token = token or os.environ.get("HF_TOKEN")
+        self.users_filename = f"{experiment_name.lower()}_users.jsonl"
+        self.preferences_filename = f"{experiment_name.lower()}_preferences.jsonl"
+        # Setup logging
+        if log_folder_path is None:
+            log_folder_path = "."
+        self.log_path = os.path.join(log_folder_path, "log_hf_db.txt")
+        logging.basicConfig(filename=self.log_path, filemode="a", level=logging.DEBUG)
+        self.logger = logging.getLogger(self.__class__.__name__)
+        # Initialize HF API
+        self.hf_api = HfApi(token=self.token)
+        # Cache for data
+        self._users_cache: List[Dict[str, Any]] = []
+        self._preferences_cache: List[Dict[str, Any]] = []
+        self._cache_loaded = False
+    def initialize_database(self) -> bool:
+        """
+        Initializes the database by ensuring the HF dataset exists and files are present.
+        Creates empty JSONL files if they don't exist on the HF dataset.
+        Returns:
+            bool: True if successful, False if an error occurred.
+        """
+        try:
+            # Check if repository exists, if not create it
+            try:
+                self.hf_api.repo_info(repo_id=self.repo_id, repo_type="dataset")
+                self.logger.info(f"Repository {self.repo_id} already exists")
+            except Exception:
+                self.logger.info(f"Creating repository {self.repo_id}")
+                self.hf_api.create_repo(
+                    repo_id=self.repo_id,
+                    repo_type="dataset",
+                    exist_ok=True,
+                    private=True,
+                )
+            # Try to load existing files or create new ones
+            self._load_data()
+            # If files don't exist, create them
+            if not self._cache_loaded:
+                self._save_users([])
+                self._save_preferences([])
+                self._users_cache = []
+                self._preferences_cache = []
+                self._cache_loaded = True
+            self.logger.info("Database initialized successfully")
+            return True
+        except Exception as e:
+            self.logger.error(f"Initializing the database failed with error: {e}")
+            return False
+    def _load_data(self):
+        """Load data from HF dataset into cache."""
+        try:
+            # Load users
+            try:
+                users_path = hf_hub_download(
+                    repo_id=self.repo_id,
+                    filename=self.users_filename,
+                    repo_type="dataset",
+                    token=self.token,
+                )
+                with open(users_path, "r") as f:
+                    self._users_cache = [json.loads(line) for line in f if line.strip()]
+            except Exception as e:
+                self.logger.info(f"Users file not found, will create new: {e}")
+                self._users_cache = []
+            # Load preferences
+            try:
+                prefs_path = hf_hub_download(
+                    repo_id=self.repo_id,
+                    filename=self.preferences_filename,
+                    repo_type="dataset",
+                    token=self.token,
+                )
+                with open(prefs_path, "r") as f:
+                    self._preferences_cache = [json.loads(line) for line in f if line.strip()]
+            except Exception as e:
+                self.logger.info(f"Preferences file not found, will create new: {e}")
+                self._preferences_cache = []
+            self._cache_loaded = True
+        except Exception as e:
+            self.logger.error(f"Error loading data: {e}")
+            self._cache_loaded = False
+    def _save_users(self, users: List[Dict[str, Any]]):
+        """Save users to HF dataset."""
+        temp_path = f"/tmp/{self.users_filename}"
+        with open(temp_path, "w") as f:
+            for user in users:
+                f.write(json.dumps(user) + "\n")
+        upload_file(
+            path_or_fileobj=temp_path,
+            path_in_repo=self.users_filename,
+            repo_id=self.repo_id,
+            repo_type="dataset",
+            token=self.token,
+        )
+        os.remove(temp_path)
+    def _save_preferences(self, preferences: List[Dict[str, Any]]):
+        """Save preferences to HF dataset."""
+        temp_path = f"/tmp/{self.preferences_filename}"
+        with open(temp_path, "w") as f:
+            for pref in preferences:
+                f.write(json.dumps(pref) + "\n")
+        upload_file(
+            path_or_fileobj=temp_path,
+            path_in_repo=self.preferences_filename,
+            repo_id=self.repo_id,
+            repo_type="dataset",
+            token=self.token,
+        )
+        os.remove(temp_path)
+    def create_user(self, username: str) -> Tuple[Optional[int], str]:
+        """
+        Creates a new user in the database.
+        Args:
+            username (str): The username of the new user.
+        Returns:
+            Tuple[Optional[int], str]:
+                The user_id of the newly created user, or None if creation failed.
+                If first entry is None then the second contains the exception message.
+        """
+        try:
+            # Reload data to ensure we have latest
+            self._load_data()
+            # Check if user already exists
+            for user in self._users_cache:
+                if user["username"] == username:
+                    msg = f"User '{username}' already exists."
+                    self.logger.warning(msg)
+                    return None, msg
+            # Generate new user_id
+            user_id = max([u["user_id"] for u in self._users_cache], default=0) + 1
+            # Create new user
+            new_user = {
+                "user_id": user_id,
+                "username": username,
+                "created_at": datetime.now().isoformat(),
+            }
+            self._users_cache.append(new_user)
+            self._save_users(self._users_cache)
+            self.logger.info(f"User '{username}' created with user_id {user_id}")
+            return user_id, username
+        except Exception as e:
+            msg = f"Failed to create user '{username}': {e}"
+            self.logger.error(msg)
+            return None, msg
+    def get_user_id_by_username(self, username: str) -> Optional[int]:
+        """
+        Checks if a username exists in the database and returns the associated user_id.
+        Args:
+            username (str): The username to look up.
+        Returns:
+            Optional[int]: The user_id if the username exists, None otherwise.
+        """
+        try:
+            # Reload data to ensure we have latest
+            self._load_data()
+            for user in self._users_cache:
+                if user["username"] == username:
+                    return user["user_id"]
+            return None
+        except Exception as e:
+            self.logger.error(f"Error checking username '{username}': {e}")
+            return None
+    def insert_preference(
+        self,
+        user_id: int,
+        reference_id: str,
+        model_left_id: str,
+        model_right_id: str,
+        preferred_side: str,
+    ) -> Tuple[bool, str]:
+        """
+        Inserts a new preference entry into the database.
+        Args:
+            user_id (int): ID of the user making the preference.
+            reference_id (str): ID of the reference image.
+            model_left_id (str): ID of the left model's generated image.
+            model_right_id (str): ID of the right model's generated image.
+            preferred_side (str): The preferred side ('left', 'right', or 'tie').
+        Returns:
+            Tuple[bool, str]: True if insertion was successful, False otherwise with a
+                string message describing the exception.
+        """
+        msg = ""
+        if preferred_side not in {"left", "right", "tie"}:
+            msg = f"Invalid preferred_side value: {preferred_side}"
+            self.logger.error(msg)
+            return False, msg
+        try:
+            # Reload data to ensure we have latest
+            self._load_data()
+            # Generate new preference_id
+            preference_id = max([p["preference_id"] for p in self._preferences_cache], default=0) + 1
+            # Create new preference
+            new_preference = {
+                "preference_id": preference_id,
+                "user_id": user_id,
+                "reference_id": reference_id,
+                "model_left_id": model_left_id,
+                "model_right_id": model_right_id,
+                "preferred_side": preferred_side,
+                "timestamp": datetime.now().isoformat(),
+            }
+            self._preferences_cache.append(new_preference)
+            self._save_preferences(self._preferences_cache)
+            self.logger.info(f"Preference inserted for user_id {user_id}")
+            return True, msg
+        except Exception as e:
+            msg = f"Failed to insert preference: {e}"
+            self.logger.error(msg)
+            return False, msg
+    def get_all_preferences(self) -> List[Tuple]:
+        """
+        Retrieves all preference entries from the database.
+        Returns:
+            List[Tuple]: A list of tuples representing all preference entries.
+        """
+        try:
+            self._load_data()
+            # Convert dicts to tuples matching SQLite format
+            result = []
+            for pref in self._preferences_cache:
+                result.append(
+                    (
+                        pref["preference_id"],
+                        pref["user_id"],
+                        pref["reference_id"],
+                        pref["model_left_id"],
+                        pref["model_right_id"],
+                        pref["preferred_side"],
+                        pref["timestamp"],
+                    )
+                )
+            return result
+        except Exception as e:
+            self.logger.error(f"Failed to retrieve preferences: {e}")
+            return []
+    def get_preferences_by_user(self, user_id: int) -> List[Tuple]:
+        """
+        Retrieves all preference entries for a specific user.
+        Args:
+            user_id (int): The ID of the user.
+        Returns:
+            List[Tuple]: A list of tuples representing the user's preference entries.
+        """
+        try:
+            self._load_data()
+            # Filter by user_id and convert to tuples
+            result = []
+            for pref in self._preferences_cache:
+                if pref["user_id"] == user_id:
+                    result.append(
+                        (
+                            pref["preference_id"],
+                            pref["user_id"],
+                            pref["reference_id"],
+                            pref["model_left_id"],
+                            pref["model_right_id"],
+                            pref["preferred_side"],
+                            pref["timestamp"],
+                        )
+                    )
+            self.logger.info(f"Retrieved {len(result)} preferences for user_id {user_id}.")
+            return result
+        except Exception as e:
+            self.logger.error(f"Failed to retrieve preferences for user_id {user_id}: {e}")
+            return []
+    def map_preferences_to_dicts(self, preferences: List[Tuple]) -> List[Dict[str, Any]]:
+        """
+        Maps a list of preference tuples to a list of dictionaries using the Preference schema.
+        Args:
+            preferences (List[Tuple]): List of tuples from the Preference table.
+        Returns:
+            List[Dict[str, Any]]: List of dictionaries with keys matching the Preference schema.
+        """
+        keys = [
+            "preference_id",
+            "user_id",
+            "reference_id",
+            "model_left_id",
+            "model_right_id",
+            "preferred_side",
+            "timestamp",
+        ]
+        return [dict(zip(keys, row)) for row in preferences]

src/gecora/db/sqlite.py ADDED Viewed

	@@ -0,0 +1,279 @@

+"""Class for Database handling using SQLite."""
+import logging
+import os
+import sqlite3
+from sqlite3 import Connection
+from typing import Any, Dict, List, Optional, Tuple
+class SQLiteDB:
+    """
+    Handles SQLite database operations for an image generation comparison experiment.
+    Attributes:
+        db_folder_path (str): Directory where the SQLite database file is stored.
+        experiment_name (str): Name of the experiment (used to name the database file).
+        db_filename (str): Filename of the SQLite database.
+        db_path (str): Full path to the SQLite database file.
+        logger (logging.Logger): Logger instance for logging database operations.
+    """
+    def __init__(self, db_folder_path: str, experiment_name: str = "arena"):
+        """
+        Initializes the SQLiteDB instance with dataset and database configuration.
+        Args:
+            db_folder_path (str): Directory where the SQLite database file is stored.
+            experiment_name (str, optional): Name of the experiment. Defaults to "arena".
+        """
+        self.experiment_name = experiment_name
+        self.db_folder_path = db_folder_path
+        self.db_filename = f"{experiment_name.lower()}.db"
+        self.db_path = os.path.join(db_folder_path, self.db_filename)
+        self.log_path = os.path.join(db_folder_path, "log_db.txt")
+        logging.basicConfig(filename=self.log_path, filemode="a", level=logging.DEBUG)
+        self.logger = logging.getLogger()
+        self.conn: Optional[Connection] = None
+    def __del__(self):
+        if self.conn is not None:
+            try:
+                self.conn.close()
+            except Exception:
+                pass
+    def initialize_database(self) -> bool:
+        """
+        Initializes the SQLite database and creates required tables if they do not exist.
+        Tables:
+            - User: Stores user information.
+            - Preference: Stores user preferences between generated images.
+        Returns:
+            bool: True if successful, False if an error occurred.
+        """
+        try:
+            db_exists = os.path.exists(self.db_path)
+            if self.conn is None:
+                self.conn = sqlite3.connect(self.db_path, check_same_thread=False)
+            with self.conn:  # auto commit
+                cursor = self.conn.cursor()
+                if db_exists:
+                    self.logger.info(f"Database already exists at {self.db_path}")
+                    return True
+                self.logger.info(f"Creating new database at {self.db_path}")
+                os.makedirs(self.db_folder_path, exist_ok=True)
+                cursor.execute("""
+                    CREATE TABLE IF NOT EXISTS User (
+                        user_id INTEGER PRIMARY KEY AUTOINCREMENT,
+                        username TEXT UNIQUE,
+                        created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
+                    )
+                """)
+                cursor.execute("""
+                    CREATE TABLE IF NOT EXISTS Preference (
+                        preference_id INTEGER PRIMARY KEY AUTOINCREMENT,
+                        user_id INTEGER,
+                        reference_id TEXT,
+                        model_left_id TEXT,
+                        model_right_id TEXT,
+                        preferred_side TEXT CHECK(preferred_side IN ('left', 'right', 'tie')),
+                        timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+                        FOREIGN KEY (user_id) REFERENCES User(user_id)
+                    )
+                """)
+                cursor.close()
+        except Exception as e:
+            self.logger.info(f"Creating the database failed with following error: {e}")
+            return False
+        return True
+    def create_user(self, username: str) -> Tuple[Optional[int], str]:
+        """
+        Creates a new user in the database.
+        Args:
+            username (str): The username of the new user.
+        Returns:
+            Tuple[Optional[int], str]:
+                The user_id of the newly created user, or None if creation failed.
+                If first entry is None then the second contains the exception message.
+        """
+        ret: Optional[int] = None
+        msg = ""
+        try:
+            if self.conn is None:
+                self.conn = sqlite3.connect(self.db_path)
+            with self.conn:
+                cursor = self.conn.cursor()
+                cursor.execute(
+                    """
+                    INSERT INTO User (username) VALUES (?)
+                """,
+                    (username,),
+                )
+                user_id = cursor.lastrowid
+                cursor.close()
+                self.logger.info(f"User '{username}' created with user_id {user_id}")
+                ret = user_id
+                msg = username
+        except sqlite3.IntegrityError:
+            msg = f"User '{username}' already exists."
+            self.logger.warning(msg)
+        except Exception as e:
+            msg = f"Failed to create user '{username}': {e}"
+            self.logger.error(msg)
+        return ret, msg
+    def get_user_id_by_username(self, username: str) -> Optional[int]:
+        """
+        Checks if a username exists in the database and returns the associated user_id.
+        Args:
+            username (str): The username to look up.
+        Returns:
+            Optional[int]: The user_id if the username exists, None otherwise.
+        """
+        try:
+            if self.conn is None:
+                self.conn = sqlite3.connect(self.db_path)
+            cursor = self.conn.cursor()
+            cursor.execute(
+                """
+                SELECT user_id FROM User WHERE username = ?
+            """,
+                (username,),
+            )
+            result = cursor.fetchone()
+            cursor.close()
+            if result:
+                return result[0]
+            else:
+                return None
+        except Exception as e:
+            self.logger.error(f"Error checking username '{username}': {e}")
+            return None
+    def insert_preference(
+        self, user_id: int, reference_id: str, model_left_id: str, model_right_id: str, preferred_side: str
+    ) -> Tuple[bool, str]:
+        """
+        Inserts a new preference entry into the database.
+        Args:
+            user_id (int): ID of the user making the preference.
+            reference_id (str): ID of the reference image.
+            model_left_id (str): ID of the left model's generated image.
+            model_right_id (str): ID of the right model's generated image.
+            preferred_side (str): The preferred side ('left', 'right', or 'tie').
+        Returns:
+            Tuple[bool, str]: True if insertion was successful, False otherwise with a
+                string message describing the exception.
+        """
+        msg = ""
+        if preferred_side not in {"left", "right", "tie"}:
+            msg = f"Invalid preferred_side value: {preferred_side}"
+            self.logger.error(msg)
+            return False, msg
+        try:
+            if self.conn is None:
+                self.conn = sqlite3.connect(self.db_path)
+            with self.conn:
+                cursor = self.conn.cursor()
+                cursor.execute(
+                    """
+                    INSERT INTO Preference (
+                        user_id, reference_id, model_left_id, model_right_id, preferred_side
+                    ) VALUES (?, ?, ?, ?, ?)
+                """,
+                    (user_id, reference_id, model_left_id, model_right_id, preferred_side),
+                )
+                cursor.close()
+                self.logger.info(f"Preference inserted for user_id {user_id}")
+                return True, msg
+        except Exception as e:
+            msg = f"Failed to insert preference: {e}"
+            self.logger.error(msg)
+            return False, msg
+    def get_all_preferences(self) -> List[Tuple]:
+        """
+        Retrieves all preference entries from the database.
+        Returns:
+            List[Tuple]: A list of tuples representing all preference entries.
+        """
+        preferences = []
+        try:
+            if self.conn is None:
+                self.conn = sqlite3.connect(self.db_path)
+            with self.conn:
+                cursor = self.conn.cursor()
+                cursor.execute("SELECT * FROM Preference")
+                preferences = cursor.fetchall()
+                cursor.close()
+        except Exception as e:
+            self.logger.error(f"Failed to retrieve preferences: {e}")
+        return preferences
+    def get_preferences_by_user(self, user_id: int) -> List[Tuple]:
+        """
+        Retrieves all preference entries for a specific user.
+        Args:
+            user_id (int): The ID of the user.
+        Returns:
+            List[Tuple]: A list of tuples representing the user's preference entries.
+        """
+        preferences = []
+        try:
+            if self.conn is None:
+                self.conn = sqlite3.connect(self.db_path)
+            with self.conn:
+                cursor = self.conn.cursor()
+                cursor.execute("SELECT * FROM Preference WHERE user_id = ?", (user_id,))
+                preferences = cursor.fetchall()
+                cursor.close()
+                self.logger.info(f"Retrieved {len(preferences)} preferences for user_id {user_id}.")
+        except Exception as e:
+            self.logger.error(f"Failed to retrieve preferences for user_id {user_id}: {e}")
+        return preferences
+    def map_preferences_to_dicts(self, preferences: List[Tuple]) -> List[Dict[str, Any]]:
+        """
+        Maps a list of preference tuples to a list of dictionaries using the Preference schema.
+        Args:
+            preferences (List[Tuple]): List of tuples from the Preference table.
+        Returns:
+            List[Dict[str, Any]]: List of dictionaries with keys matching the Preference schema.
+        """
+        keys = [
+            "preference_id",
+            "user_id",
+            "reference_id",
+            "model_left_id",
+            "model_right_id",
+            "preferred_side",
+            "timestamp",
+        ]
+        return [dict(zip(keys, row)) for row in preferences]

src/gecora/logging/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ """Init."""

src/gecora/logging/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (170 Bytes). View file

src/gecora/logging/__pycache__/logger.cpython-312.pyc ADDED Viewed

Binary file (1.01 kB). View file

src/gecora/logging/logger.py ADDED Viewed

	@@ -0,0 +1,16 @@

+import logging
+def setup_file_logger(name: str, log_file: str, level=logging.INFO) -> logging.Logger:
+    """Creates a logger with a specific name and log file."""
+    logger = logging.getLogger(name)
+    logger.setLevel(level)
+    # Prevent adding multiple handlers if logger already exists
+    if not logger.handlers:
+        file_handler = logging.FileHandler(log_file)
+        formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")
+        file_handler.setFormatter(formatter)
+        logger.addHandler(file_handler)
+    return logger

src/gecora/logic/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ """Init."""

src/gecora/logic/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (168 Bytes). View file

src/gecora/logic/__pycache__/base.cpython-312.pyc ADDED Viewed

Binary file (2.81 kB). View file

src/gecora/logic/__pycache__/loomis_painter.cpython-312.pyc ADDED Viewed

Binary file (8.98 kB). View file

src/gecora/logic/__pycache__/utils.cpython-312.pyc ADDED Viewed

Binary file (1.9 kB). View file

src/gecora/logic/base.py ADDED Viewed

	@@ -0,0 +1,53 @@

+from typing import Dict, Optional, Tuple
+from PIL import Image
+class LogicBase:
+    """
+    Ranking App.
+    Attributes:
+        root_path (str): Root directory for the application.
+        dataset_path (Optional[str]): Path to the dataset, if provided.
+        ranking_system (ModelRankingSystem): Instance of the model ranking system.
+        gecora_db (Optional[GecoraDB]): Placeholder for GecoraDB integration.
+        dataset_manager (Optional[GecoraDatasetManager]): Placeholder for dataset manager.
+        itov_app (Optional[ItoVApp]): Placeholder for ItoVApp integration.
+    """
+    def __init__(self, root_path: str, dataset_path: Optional[str] = None, dataset_split: str = "test"):
+        """Initializes Base Class"""
+        pass
+    def launch(self):
+        """Launches the App."""
+        raise NotImplementedError("This method should be implemented by the child class.")
+    def set_username(self, username: str) -> Optional[int]:
+        """
+        Creates the User in the database if not alredy present.
+        """
+        raise NotImplementedError("This method should be implemented by the child class.")
+    def set_preference(
+        self, user_id: int, reference_id: str, model_left_id: str, model_right_id: str, preferred_side: str
+    ) -> Tuple[bool, str]:
+        """
+        Sets a new preference entry.
+        """
+        raise NotImplementedError("This method should be implemented by the child class.")
+    def get_next_comparison(
+        self, user_id: int
+    ) -> Optional[Tuple[Tuple[str, str, str], Tuple[Image.Image, Dict, Dict], Tuple[int, int]]]:
+        """
+        Selects the next model pair for comparison.
+        Args:
+            user_id (int): ID of the current user.
+        Returns:
+            Optional[Tuple[Tuple[str, str, str], Tuple[Dict, Dict], Tuple[int, int]]]: ((reference_id, model_left_id, model_right_id), left_entry, right_entry), (num_preferences, total_num_comparison)
+        """
+        raise NotImplementedError("This method should be implemented by the child class.")

src/gecora/logic/loomis_painter.py ADDED Viewed

	@@ -0,0 +1,205 @@

+import logging
+import os
+from typing import Any, Optional, Tuple, Union
+from gecora.app.i_to_v_app import ItoVApp
+from gecora.dataset.base_manager import BaseDatasetManager, DatasetManagerConfig
+from gecora.dataset.video_manager import VideoManager
+from gecora.db.hf_jsonl import HFJsonlDB
+from gecora.db.sqlite import SQLiteDB
+from gecora.logic.base import LogicBase
+from gecora.logic.utils import save_video_from_frames
+from gecora.ranking.ranking_system import RankingSystem
+class LoomisPainterApp(LogicBase):
+    """
+    Loomis Painter Ranking App.
+    Attributes:
+        root_path (str): Root directory for the application.
+        dataset_path (Optional[str]): Path to the dataset, if provided.
+        ranking_system (ModelRankingSystem): Instance of the model ranking system.
+        gecora_db (Union[SQLiteDB, HFJsonlDB]): Database instance (SQLite or HF JSONL).
+        dataset_manager (Optional[GecoraDatasetManager]): Placeholder for dataset manager.
+        itov_app (Optional[ItoVApp]): Placeholder for ItoVApp integration.
+    """
+    def __init__(
+        self,
+        root_path: str,
+        dataset_path: Optional[str] = None,
+        use_hf_db: bool = True,
+        hf_repo_id: Optional[str] = None,
+        hf_token: Optional[str] = None,
+        force_model_id: Optional[str] = None,
+        desired_num_selections: Optional[int] = 25,
+    ):
+        """Initializes LoomisPainterApp.
+        Args:
+            root_path (str): The root directory containing subfolders with datasets.
+            dataset_path (Optional[str]): Path to the dataset, if provided.
+            use_hf_db (bool): Whether to use HuggingFace JSONL database instead of SQLite.
+            hf_repo_id (Optional[str]): HF repository ID (required if use_hf_db=True).
+            hf_token (Optional[str]): HF API token (if not provided, uses HF_TOKEN env var).
+            force_model_id (Optional[str]): If set select preferences where force_model_id is always included.
+            desired_num_selections (Optional[int]): The desired number of comparison the user should at least select.
+        """
+        self.root_path = root_path
+        self.dataset_path = dataset_path
+        self.force_model_id = force_model_id
+        self.desired_num_selections = desired_num_selections
+        # Core logic
+        self.ranking_system = RankingSystem()
+        # External integrations (to be implemented or injected)
+        if dataset_path is None:
+            dataset_path = root_path
+        self.gecora_db: Union[HFJsonlDB, SQLiteDB]
+        # Initialize database based on configuration
+        if use_hf_db:
+            if hf_repo_id is None:
+                raise ValueError("hf_repo_id must be provided when use_hf_db=True")
+            self.gecora_db = HFJsonlDB(
+                repo_id=hf_repo_id,
+                experiment_name="arena",
+                token=hf_token,
+                log_folder_path=root_path,
+            )
+        else:
+            self.gecora_db = SQLiteDB(
+                db_folder_path=root_path,
+            )
+        self.gecora_db.initialize_database()
+        dm_conf = DatasetManagerConfig(
+            root_path=dataset_path,
+            reference_column_name="reference_image",
+            genereated_column_name="video",
+            logging_path=root_path,
+        )
+        self.dataset_manager: BaseDatasetManager = VideoManager(config=dm_conf)
+        task_desc: str = """
+            ### 🎨 Choose the Best Step-By-Step Painting video
+            You will be shown a **reference painting** in the center, with two generated videos (Left and Right) that depict the painting process.
+            Your goal is to decide which step-by-step process is better based on the criteria below.
+            Please consider the following when making your decision:
+            * **Process Completeness:** Choose the video that best captures the entire painting process, from start to the finished work.
+            * **Visual Fidelity:** The final frame of the video should match the reference painting as closely as possible.
+            * **Ignore Duration:** The videos may be of different lengths. Please do not let the video duration influence your decision.
+            #### How to Vote
+            * Click **←** if the **Left** video is better.
+            * Click **→** if the **Right** video is better.
+            * Click **Tie** if neither video is clearly superior.
+            """
+        ref_img_label: str = "Reference Painting"
+        left_media_label: str = "Left Painting Process"
+        right_media_label: str = "Right Painting Process"
+        self.itov_app: ItoVApp = ItoVApp(
+            logic_class=self,
+            task_desc=task_desc,
+            ref_img_label=ref_img_label,
+            left_media_label=left_media_label,
+            right_media_label=right_media_label,
+            desired_num_selections=self.desired_num_selections,
+        )
+        self.log_path = os.path.join(root_path, "log.txt")
+        logging.basicConfig(filename=self.log_path, filemode="a", level=logging.DEBUG)
+        self.logger = logging.getLogger()
+    def launch(self):
+        """Launches the App."""
+        self.itov_app.launch()
+    def set_username(self, username: str) -> Optional[int]:
+        """
+        Creates the User in the database if not already present.
+        """
+        user_id = self.gecora_db.get_user_id_by_username(username=username)
+        if user_id is None:
+            user_id, msg = self.gecora_db.create_user(username=username)
+            if user_id is None:
+                self.logger.error(f"Error while creating user with username {username}: {msg}")
+                return None
+        return user_id
+    def set_preference(
+        self, user_id: int, reference_id: str, model_left_id: str, model_right_id: str, preferred_side: str
+    ) -> Tuple[bool, str]:
+        """
+        Sets a new preference entry.
+        """
+        return self.gecora_db.insert_preference(
+            user_id=user_id,
+            reference_id=reference_id,
+            model_left_id=model_left_id,
+            model_right_id=model_right_id,
+            preferred_side=preferred_side,
+        )
+    def run_ranking_update(self):
+        """
+        Loads preferences and updates model rankings.
+        """
+        preferences = self.load_preferences()
+        self.ranking_system.update_rankings(preferences)
+    def get_next_comparison(
+        self, user_id: int
+    ) -> Optional[Tuple[Tuple[str, str, str], Tuple[Any, Any, Any], Tuple[int, int]]]:
+        """
+        Selects the next model pair for comparison.
+        Args:
+            user_id (int): ID of the current user.
+        Returns:
+            Optional[Tuple[Tuple[str, str, str], Tuple[Any, Any, Any], Tuple[int, int]]]: (reference_id, model_left_id, model_right_id), (reference_image, left_entry, right_entry), (num_preferences, total_num_comparison)
+        """
+        reference_ids = self.dataset_manager.common_entry_ids
+        model_ids = self.dataset_manager.get_dataset_names()
+        # Having a data base call here every time may not be ideal. The assumption is that one user
+        # does have a small number of set preferences, therefore runtime is still good.
+        preferences_tuple = self.gecora_db.get_preferences_by_user(user_id=user_id)
+        # Mapping from List[Tuple] to List[Dict[str, Any]]
+        preferences = self.gecora_db.map_preferences_to_dicts(preferences=preferences_tuple)
+        selected = self.ranking_system.select_next_comparison(
+            preferences, reference_ids, model_ids, force_model_id=self.force_model_id
+        )
+        num_preferences, total_num_comparison = self.ranking_system.calculate_progress(
+            preferences, reference_ids, model_ids, force_model_id=self.force_model_id
+        )
+        try:
+            if selected is None:
+                self.logger.warning(f"Was not able to get next comparison for user {user_id}")
+                return None
+            reference_id, model_left_id, model_right_id = selected
+            reference_image, left_entry, right_entry = self.dataset_manager.get_entries_by_id(
+                entry_id=reference_id, dataset_name1=model_left_id, dataset_name2=model_right_id
+            )
+            if reference_image is None or left_entry is None or right_entry is None:
+                return None
+            if isinstance(left_entry, list):
+                left_entry = save_video_from_frames(
+                    left_entry, video_output_path=self.itov_app.tmp_video_path_left, fps=2
+                )
+            if isinstance(right_entry, list):
+                right_entry = save_video_from_frames(
+                    right_entry, video_output_path=self.itov_app.tmp_video_path_right, fps=2
+                )
+        except Exception as e:
+            self.logger.error(f"Error in get_next_comparison({user_id}) of LoomisPainter: {e}")
+            return None
+        return (
+            (reference_id, model_left_id, model_right_id),
+            (reference_image, left_entry, right_entry),
+            (num_preferences, total_num_comparison),
+        )

src/gecora/logic/utils.py ADDED Viewed

	@@ -0,0 +1,37 @@

+import os
+import tempfile
+from typing import List, Optional
+import imageio.v3 as iio
+from PIL import Image
+def create_temp_file(suffix: str = ".mp4") -> str:
+    """Function to create a temporary file and return its path."""
+    # Create a temporary file with a .mp4 extension
+    temp_file = tempfile.NamedTemporaryFile(suffix=suffix, delete=False)
+    temp_file_path = temp_file.name
+    temp_file.close()  # Close the file but keep it on disk
+    return temp_file_path
+def cleanup(temp_file_path: str):
+    """Cleanup function to delete the temporary file."""
+    if os.path.exists(temp_file_path):
+        os.remove(temp_file_path)
+def cleanup_list(temp_file_path_list: List[str]):
+    """Cleanup function to delete the temporary file."""
+    for temp_file_path in temp_file_path_list:
+        if os.path.exists(temp_file_path):
+            os.remove(temp_file_path)
+def save_video_from_frames(video: List[Image], video_output_path: str, fps: int = 2) -> Optional[str]:
+    """Saves the frames from as a video to video_output_path"""
+    try:
+        iio.imwrite(video_output_path, video, fps=fps, codec="libx264")
+        return video_output_path
+    except Exception:
+        return None

src/gecora/py.typed ADDED Viewed

File without changes

src/gecora/ranking/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ """Init."""

src/gecora/ranking/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (170 Bytes). View file

src/gecora/ranking/__pycache__/ranking_system.cpython-312.pyc ADDED Viewed

Binary file (5.58 kB). View file

src/gecora/ranking/ranking_system.py ADDED Viewed

	@@ -0,0 +1,178 @@

+import itertools
+import random
+from collections import defaultdict
+from typing import Dict, List, Optional, Set, Tuple
+class RankingSystem:
+    def __init__(self, base_rating: int = 1500, k: int = 32):
+        """
+        Initializes the ranking system.
+        Args:
+            base_rating (int): Initial ELO rating for all models.
+            k (int): ELO adjustment factor.
+        """
+        self.base_rating = base_rating
+        self.k = k
+        self.elo_ratings: Dict[str, float] = defaultdict(lambda: base_rating)
+        self.win_counts: Dict[str, int] = defaultdict(int)
+        self.match_counts: Dict[str, int] = defaultdict(int)
+    def update_rankings(self, preferences: List[Dict[str, str]]) -> None:
+        """
+        Updates ELO ratings and win counts based on preferences.
+        Args:
+            preferences (List[Dict[str, str]]): List of preference records.
+        """
+        def expected_score(rating_a: float, rating_b: float) -> float:
+            return 1 / (1 + 10 ** ((rating_b - rating_a) / 400))
+        for pref in preferences:
+            left = pref["model_left_id"]
+            right = pref["model_right_id"]
+            result = pref["preferred_side"]
+            rating_left = self.elo_ratings[left]
+            rating_right = self.elo_ratings[right]
+            expected_left = expected_score(rating_left, rating_right)
+            expected_right = expected_score(rating_right, rating_left)
+            if result == "left":
+                score_left, score_right = 1.0, 0.0
+                self.win_counts[left] += 1
+            elif result == "right":
+                score_left, score_right = 0.0, 1.0
+                self.win_counts[right] += 1
+            else:  # tie
+                score_left = score_right = 0.5
+            self.elo_ratings[left] += self.k * (score_left - expected_left)
+            self.elo_ratings[right] += self.k * (score_right - expected_right)
+            self.match_counts[left] += 1
+            self.match_counts[right] += 1
+    def get_elo_ratings(self) -> Dict[str, float]:
+        """Returns the current ELO ratings."""
+        return dict(self.elo_ratings)
+    def get_winrates(self) -> Dict[str, float]:
+        """Returns the win rate for each model."""
+        return {
+            model: self.win_counts[model] / self.match_counts[model] if self.match_counts[model] > 0 else 0.0
+            for model in self.match_counts
+        }
+    def calculate_progress(
+        self,
+        preferences: List[Dict[str, str]],
+        reference_ids: List[str],
+        model_ids: List[str],
+        force_model_id: Optional[str] = None,
+    ) -> Tuple[int, int]:
+        """
+        Calculates the number of unique comparisons completed and the total possible comparisons.
+        Args:
+            preferences: List of preference dictionaries (assumed to be for a single user).
+            reference_ids: List of all possible reference IDs.
+            model_ids: List of all possible model IDs.
+            force_model_id: If set, restricts total calculation to pairs involving this model.
+        Returns:
+            (seen_count, total_possible_count)
+        """
+        # We assume preferences are unique
+        seen_count = len(preferences)
+        num_refs = len(reference_ids)
+        num_models = len(model_ids)
+        # Safety check to avoid math errors if lists are empty
+        if num_refs == 0 or num_models < 2:
+            return seen_count, 0
+        if force_model_id:
+            if force_model_id not in model_ids:
+                total_possible = 0
+            else:
+                total_possible = num_refs * (num_models - 1)
+        else:
+            # Standard combinatorics: nCr = n! / (r! * (n-r)!)
+            # Number of unique pairs = N * (N - 1) / 2
+            unique_pairs_count = (num_models * (num_models - 1)) // 2
+            total_possible = num_refs * unique_pairs_count
+        return seen_count, total_possible
+    def select_next_comparison(
+        self,
+        preferences: List[Dict[str, str]],
+        reference_ids: List[str],
+        model_ids: List[str],
+        force_model_id: Optional[str],
+    ) -> Optional[Tuple[str, str, str]]:
+        """
+        Selects the next pair of models for comparison for a given user and reference.
+        Ensures the same (reference_id, model_left_id, model_right_id) is not repeated for the user.
+        Args:
+            preferences (List[Dict[str, str]]): Existing preferences.
+            user_id (int): ID of the current user.
+            reference_ids (List[str]): List of possible reference IDs.
+            model_ids (List[str]): List of model IDs to choose from.
+            force_model_id (Optional[str]): If set always includes this model_id in the comparison.
+        Returns:
+            Optional[Tuple[str, str, str]]: A tuple of (reference_id, model_left_id, model_right_id),
+                                            or None if no valid pair is found.
+        """
+        seen_comparisons: Set[Tuple[str, frozenset]] = {
+            (pref["reference_id"], frozenset({pref["model_left_id"], pref["model_right_id"]})) for pref in preferences
+        }
+        # Copy list by slicing and shuffle
+        shuffled_refs = reference_ids[:]
+        random.shuffle(shuffled_refs)
+        shuffled_models = model_ids[:]
+        random.shuffle(shuffled_models)
+        # Handling force_model_id
+        if force_model_id:
+            if force_model_id not in model_ids:
+                return None
+            opponents = [m for m in shuffled_models if m != force_model_id]
+            for ref_id in shuffled_refs:
+                for opponent in opponents:
+                    current_pair = frozenset({force_model_id, opponent})
+                    if (ref_id, current_pair) not in seen_comparisons:
+                        # Randomize left/right placement for UI neutrality
+                        return (
+                            (ref_id, force_model_id, opponent)
+                            if random.random() > 0.5
+                            else (ref_id, opponent, force_model_id)
+                        )
+            return None
+        # Generate all possible unique pairs from the shuffled list
+        # combinations('ABCD', 2) --> AB AC AD BC BD CD
+        possible_pairs = list(itertools.combinations(shuffled_models, 2))
+        # Optional: Shuffle pairs again to ensure 'AB' isn't always checked before 'CD'
+        random.shuffle(possible_pairs)
+        for ref_id in shuffled_refs:
+            for m1, m2 in possible_pairs:
+                current_pair = frozenset({m1, m2})
+                if (ref_id, current_pair) not in seen_comparisons:
+                    return (ref_id, m1, m2)
+        return None