Spaces:

ltg
/

fluency-annotation

Running

App Files Files Community

davda54 commited on Aug 13, 2025

Commit

086869b

verified ·

1 Parent(s): 652a95d

Update app.py

Browse files

Files changed (1) hide show

app.py +204 -51

app.py CHANGED Viewed

@@ -7,8 +7,10 @@ import random
 from datetime import datetime
 from typing import Dict, List, Tuple
 import hashlib
-from datasets import load_dataset
 import itertools
 from collections.abc import Iterable
@@ -215,6 +217,10 @@ TODO
 **Code or mathematical expressions**: If responses contain code snippets or mathematical expressions, evaluate only the fluency of the natural language portions.
 """
 HF_TOKEN = os.environ.get("HF_TOKEN")
 # Model names for the three responses
@@ -289,55 +295,109 @@ class AnnotationManager:
     def __init__(self):
         self.annotations = {}  # Store annotations by user_id
         self.user_states = {}  # Track each user's progress
     def get_user_seed(self, user_id: str) -> int:
         """Generate consistent seed for user"""
-        return int(hashlib.md5(user_id.encode()).hexdigest(), 16) % 100000
     def get_user_samples(self, user_id: str) -> List[Dict]:
         """Get shuffled samples for user based on their ID"""
         seed = self.get_user_seed(user_id)
-        samples = DATASET_SAMPLES.copy()  # Use loaded dataset
         random.Random(seed).shuffle(samples)
-        samples = [
-            sample if random.Random(seed + i).randint(0, 1) == 0 else swap_sample(sample)
-            for i, sample in enumerate(samples)
-        ]
         return samples
-    def save_annotation(self, user_id: str, sample_id: str, choice: str,
-                        model_a: str = None, model_b: str = None):
-        """Save user's annotation with model information"""
-        if user_id not in self.annotations:
-            self.annotations[user_id] = []
-        annotation = {
-            "user_id": user_id,
-            "sample_id": sample_id,
-            "choice": choice,
-            "model_a": model_a,
-            "model_b": model_b,
-            "timestamp": datetime.now().isoformat()
-        }
-        self.annotations[user_id].append(annotation)
-        # Update user state
-        if user_id in self.user_states:
-            self.user_states[user_id]["annotations"].append(sample_id)
-            self.user_states[user_id]["current_index"] += 1
-        # In production, save to HuggingFace dataset here
-        print(f"Saved annotation: {annotation}")
-    def get_user_progress(self, user_id: str) -> Dict:
-        """Get user's annotation progress"""
-        if user_id not in self.annotations:
-            return {"completed": 0, "total": len(DATASET_SAMPLES)}
-        completed = len(self.annotations[user_id])
-        return {"completed": completed, "total": len(DATASET_SAMPLES)}
     def get_next_sample(self, user_id: str) -> Tuple[Dict, int, int]:
         """Get next unannotated sample for user"""
         if user_id not in self.user_states:
@@ -349,12 +409,13 @@ class AnnotationManager:
         samples = self.get_user_samples(user_id)
         state = self.user_states[user_id]
         # Find next unannotated sample
-        while state["current_index"] < len(samples):
-            sample = samples[state["current_index"]]
             if not self.is_annotated(user_id, sample["id"]):
-                return sample, state["current_index"] + 1, len(samples)
-            state["current_index"] += 1
         # All samples annotated
         return None, len(samples), len(samples)
@@ -364,6 +425,94 @@ class AnnotationManager:
         if user_id not in self.annotations:
             return False
         return any(ann["sample_id"] == sample_id for ann in self.annotations[user_id])
 # Initialize manager
@@ -406,7 +555,8 @@ def login(user_id: str) -> Tuple:
         gr.update(value=sample["prompt"]),  # prompt
         gr.update(value=sample["response_a"]),  # response_a
         gr.update(value=sample["response_b"]),  # response_b
-        gr.update(value=f"Progress: {current}/{total}")  # progress
     )
 def annotate(choice: str, user_id: str) -> Tuple:
@@ -429,13 +579,13 @@ def annotate(choice: str, user_id: str) -> Tuple:
             "b_better": "B is more fluent",
             "equal": "Equally fluent"
         }
-        # Save with model information
         manager.save_annotation(
             user_id,
             sample["id"],
             choice_map[choice],
-            model_a=sample.get("model_a"),
-            model_b=sample.get("model_b")
         )
     # Get next sample
@@ -450,12 +600,15 @@ def annotate(choice: str, user_id: str) -> Tuple:
             gr.update(value="All annotations complete!", visible=True)  # status
         )
     return (
         gr.update(value=next_sample["prompt"]),  # prompt
         gr.update(value=next_sample["response_a"]),  # response_a
         gr.update(value=next_sample["response_b"]),  # response_b
-        gr.update(value=f"Progress: {current}/{total} | Comparing: {sample.get('model_a', 'A')} vs {sample.get('model_b', 'B')}"),  # progress
-        # gr.update(value=f"Progress: {current}/{total}"),  # progress
         gr.update(value="Annotation saved!", visible=True)  # status
     )

 from datetime import datetime
 from typing import Dict, List, Tuple
 import hashlib
 import itertools
+from datasets import load_dataset, Dataset, DatasetDict
+from huggingface_hub import HfApi, create_repo, repo_exists
+import threading
 from collections.abc import Iterable
 **Code or mathematical expressions**: If responses contain code snippets or mathematical expressions, evaluate only the fluency of the natural language portions.
 """
+# Configuration for the output dataset
+OUTPUT_DATASET_NAME = "ltg/fluency-annotations"  # Change to your desired dataset name
+OUTPUT_DATASET_PRIVATE = True  # Keep the annotations dataset private
 HF_TOKEN = os.environ.get("HF_TOKEN")
 # Model names for the three responses
     def __init__(self):
         self.annotations = {}  # Store annotations by user_id
         self.user_states = {}  # Track each user's progress
+        self.annotation_cache = []  # Cache for batch uploads
+        self.lock = threading.Lock()  # Thread safety for annotations
+        # Initialize or load existing annotations dataset
+        self.init_annotations_dataset()
+    def init_annotations_dataset(self):
+        """Initialize or load existing annotations from HuggingFace"""
+        try:
+            if HF_TOKEN:
+                api = HfApi(token=HF_TOKEN)
+                # Check if dataset exists, if not create it
+                if not repo_exists(OUTPUT_DATASET_NAME, repo_type="dataset", token=HF_TOKEN):
+                    print(f"Creating new dataset: {OUTPUT_DATASET_NAME}")
+                    create_repo(
+                        OUTPUT_DATASET_NAME,
+                        repo_type="dataset",
+                        private=OUTPUT_DATASET_PRIVATE,
+                        token=HF_TOKEN
+                    )
+                    # Create empty dataset structure
+                    self.push_empty_dataset()
+                else:
+                    # Load existing annotations
+                    print(f"Loading existing annotations from {OUTPUT_DATASET_NAME}")
+                    self.load_existing_annotations()
+            else:
+                print("Warning: No HF_TOKEN found. Annotations will only be saved locally.")
+        except Exception as e:
+            print(f"Error initializing annotations dataset: {e}")
+            print("Continuing with local-only mode")
+    def push_empty_dataset(self):
+        """Create and push empty dataset structure"""
+        try:
+            empty_data = {
+                "user_id": [],
+                "sample_id": [],
+                "original_id": [],
+                "model_a": [],
+                "model_b": [],
+                "choice": [],
+                "prompt": [],
+                "response_a": [],
+                "response_b": [],
+                "dataset": [],
+                "timestamp": []
+            }
+            dataset = Dataset.from_dict(empty_data)
+            dataset.push_to_hub(OUTPUT_DATASET_NAME, token=HF_TOKEN, private=OUTPUT_DATASET_PRIVATE)
+            print(f"Created empty dataset at {OUTPUT_DATASET_NAME}")
+        except Exception as e:
+            print(f"Error creating empty dataset: {e}")
+    def load_existing_annotations(self):
+        """Load existing annotations from HuggingFace dataset"""
+        try:
+            dataset = load_dataset(OUTPUT_DATASET_NAME, split="train", token=HF_TOKEN)
+            # Rebuild annotations dictionary from dataset
+            for item in dataset:
+                user_id = item["user_id"]
+                if user_id not in self.annotations:
+                    self.annotations[user_id] = []
+                # Add to user's annotations
+                self.annotations[user_id].append({
+                    "user_id": user_id,
+                    "sample_id": item["sample_id"],
+                    "choice": item["choice"],
+                    "model_a": item.get("model_a", ""),
+                    "model_b": item.get("model_b", ""),
+                    "timestamp": item["timestamp"]
+                })
+                # Update user state
+                if user_id not in self.user_states:
+                    self.user_states[user_id] = {
+                        "current_index": 0,
+                        "annotations": []
+                    }
+                if item["sample_id"] not in self.user_states[user_id]["annotations"]:
+                    self.user_states[user_id]["annotations"].append(item["sample_id"])
+            print(f"Loaded {len(dataset)} existing annotations")
+        except Exception as e:
+            print(f"Error loading existing annotations: {e}")
+            print("Starting with empty annotations")
     def get_user_seed(self, user_id: str) -> int:
         """Generate consistent seed for user"""
+        return int(hashlib.md5(user_id.encode()).hexdigest(), 16) % 10000
     def get_user_samples(self, user_id: str) -> List[Dict]:
         """Get shuffled samples for user based on their ID"""
         seed = self.get_user_seed(user_id)
+        samples = DATASET_SAMPLES.copy()
         random.Random(seed).shuffle(samples)
         return samples
     def get_next_sample(self, user_id: str) -> Tuple[Dict, int, int]:
         """Get next unannotated sample for user"""
         if user_id not in self.user_states:
         samples = self.get_user_samples(user_id)
         state = self.user_states[user_id]
+        # Count already annotated
+        annotated_count = len(state["annotations"])
         # Find next unannotated sample
+        for i, sample in enumerate(samples):
             if not self.is_annotated(user_id, sample["id"]):
+                return sample, annotated_count + 1, len(samples)
         # All samples annotated
         return None, len(samples), len(samples)
         if user_id not in self.annotations:
             return False
         return any(ann["sample_id"] == sample_id for ann in self.annotations[user_id])
+    def save_annotation(self, user_id: str, sample_id: str, choice: str,
+                       sample_data: Dict = None):
+        """Save user's annotation locally and to HuggingFace"""
+        with self.lock:
+            if user_id not in self.annotations:
+                self.annotations[user_id] = []
+            annotation = {
+                "user_id": user_id,
+                "sample_id": sample_id,
+                "choice": choice,
+                "timestamp": datetime.now().isoformat()
+            }
+            # Add sample data if provided
+            if sample_data:
+                annotation.update({
+                    "original_id": sample_data.get("original_id", ""),
+                    "model_a": sample_data.get("model_a", ""),
+                    "model_b": sample_data.get("model_b", ""),
+                    "prompt": sample_data.get("prompt", ""),
+                    "response_a": sample_data.get("response_a", ""),
+                    "response_b": sample_data.get("response_b", ""),
+                    "dataset": sample_data.get("dataset", "")
+                })
+            self.annotations[user_id].append(annotation)
+            # Update user state
+            if user_id in self.user_states:
+                if sample_id not in self.user_states[user_id]["annotations"]:
+                    self.user_states[user_id]["annotations"].append(sample_id)
+                self.user_states[user_id]["current_index"] += 1
+            print(f"Saved annotation locally: {annotation['sample_id']} by {user_id}")
+            # Save to HuggingFace asynchronously
+            if HF_TOKEN:
+                thread = threading.Thread(
+                    target=self.push_annotation_to_hub,
+                    args=(annotation,)
+                )
+                thread.daemon = True
+                thread.start()
+    def push_annotation_to_hub(self, annotation: Dict):
+        """Push single annotation to HuggingFace dataset"""
+        try:
+            # Load current dataset
+            dataset = load_dataset(OUTPUT_DATASET_NAME, split="train", token=HF_TOKEN)
+            # Convert to dict
+            data_dict = dataset.to_dict()
+            # Ensure all keys exist
+            required_keys = ["user_id", "sample_id", "original_id", "model_a",
+                           "model_b", "choice", "prompt", "response_a",
+                           "response_b", "dataset", "timestamp"]
+            for key in required_keys:
+                if key not in data_dict:
+                    data_dict[key] = []
+                # Append new annotation data
+                data_dict[key].append(annotation.get(key, ""))
+            # Create new dataset and push
+            updated_dataset = Dataset.from_dict(data_dict)
+            updated_dataset.push_to_hub(
+                OUTPUT_DATASET_NAME,
+                token=HF_TOKEN,
+                private=OUTPUT_DATASET_PRIVATE
+            )
+            print(f"Successfully pushed annotation to hub: {annotation['sample_id']}")
+        except Exception as e:
+            print(f"Error pushing annotation to hub: {e}")
+            # Add to cache for batch upload later
+            self.annotation_cache.append(annotation)
+    def get_user_progress(self, user_id: str) -> Dict:
+        """Get user's annotation progress"""
+        if user_id not in self.user_states:
+            return {"completed": 0, "total": len(DATASET_SAMPLES)}
+        completed = len(self.user_states[user_id]["annotations"])
+        return {"completed": completed, "total": len(DATASET_SAMPLES)}
 # Initialize manager
         gr.update(value=sample["prompt"]),  # prompt
         gr.update(value=sample["response_a"]),  # response_a
         gr.update(value=sample["response_b"]),  # response_b
+        gr.update(value=f"Progress: {current}/{total} | Comparing: {sample.get('model_a', 'A')} vs {sample.get('model_b', 'B')}")  # progress
+        # gr.update(value=f"Progress: {current}/{total}")  # progress
     )
 def annotate(choice: str, user_id: str) -> Tuple:
             "b_better": "B is more fluent",
             "equal": "Equally fluent"
         }
+        # Save with full sample data for HuggingFace dataset
         manager.save_annotation(
             user_id,
             sample["id"],
             choice_map[choice],
+            sample_data=sample  # Pass the full sample data
         )
     # Get next sample
             gr.update(value="All annotations complete!", visible=True)  # status
         )
+    # Show which models are being compared
+    model_info = f" | Comparing: {next_sample.get('model_a', 'A')} vs {next_sample.get('model_b', 'B')}"
     return (
         gr.update(value=next_sample["prompt"]),  # prompt
         gr.update(value=next_sample["response_a"]),  # response_a
         gr.update(value=next_sample["response_b"]),  # response_b
+        gr.update(value=f"Progress: {current}/{total} | Comparing: {sample.get('model_a', 'A')} vs {sample.get('model_b', 'B')}"),
+        # gr.update(value=f"Progress: {current}/{total}{model_info}"),  # progress
         gr.update(value="Annotation saved!", visible=True)  # status
     )