Spaces:
Runtime error
Runtime error
| """Global variables used in the space. | |
| """ | |
| import os | |
| import json | |
| from huggingface_hub import HfApi | |
| import jsonlines | |
| import gradio as gr | |
| from src.constants import DATASET_NAME, HF_TOKEN, ASSETS_FOLDER, CONCEPTS | |
| hf_api: HfApi | |
| all_metadata: dict | |
| all_votes: dict | |
| def setup(): | |
| global hf_api | |
| global all_metadata | |
| global all_votes | |
| hf_api = HfApi(token=HF_TOKEN) | |
| hf_api.snapshot_download( | |
| local_dir=f"{ASSETS_FOLDER}/{DATASET_NAME}", | |
| repo_id=DATASET_NAME, | |
| repo_type="dataset", | |
| ) | |
| all_metadata = {} | |
| for split in ["train", "test"]: | |
| all_metadata[split] = [] | |
| with jsonlines.open(f"{ASSETS_FOLDER}/{DATASET_NAME}/data/{split}/metadata.jsonl") as reader: | |
| for row in reader: | |
| all_metadata[split].append(row) | |
| all_votes = {} | |
| for file in os.listdir(f"{ASSETS_FOLDER}/{DATASET_NAME}/votes"): | |
| with open(f"{ASSETS_FOLDER}/{DATASET_NAME}/votes/{file}") as f: | |
| key = file.split(".")[0] | |
| all_votes[key] = json.load(f) | |
| def get_metadata(split): | |
| global all_metadata | |
| global hf_api | |
| hf_api.hf_hub_download( | |
| repo_id=DATASET_NAME, | |
| filename="metadata.jsonl", | |
| subfolder=f"data/{split}", | |
| repo_type="dataset", | |
| local_dir=f"{ASSETS_FOLDER}/{DATASET_NAME}", | |
| ) | |
| all_metadata[split] = [] | |
| with jsonlines.open(f"{ASSETS_FOLDER}/{DATASET_NAME}/data/{split}/metadata.jsonl") as reader: | |
| for row in reader: | |
| all_metadata[split].append(row) | |
| def save_metadata(split): | |
| global all_metadata | |
| values = all_metadata[split] | |
| with jsonlines.open(f"{ASSETS_FOLDER}/{DATASET_NAME}/data/{split}/metadata.jsonl", mode='w') as writer: | |
| writer.write_all(values) | |
| hf_api.upload_file( | |
| path_or_fileobj=f"{ASSETS_FOLDER}/{DATASET_NAME}/data/{split}/metadata.jsonl", | |
| path_in_repo=f"data/{split}/metadata.jsonl", | |
| repo_id=DATASET_NAME, | |
| repo_type="dataset", | |
| ) | |
| def update_votes( | |
| username: str, | |
| current_image: str, | |
| voted_concepts: list, | |
| ): | |
| global all_metadata | |
| global all_votes | |
| current_split, idx = current_image.split(":") | |
| idx = int(idx) | |
| s_id = all_metadata[current_split][idx]["id"] | |
| if s_id not in all_votes: | |
| all_votes[s_id] = {} | |
| all_votes[s_id][username] = {c: c in voted_concepts for c in CONCEPTS} | |
| new_concepts = compute_concepts(all_votes[s_id]) | |
| for concept, concept_value in new_concepts.items(): | |
| all_metadata[current_split][idx][concept] = concept_value | |
| def compute_concepts(votes): | |
| vote_sum = {c: 0 for c in CONCEPTS} | |
| for vote in votes.values(): | |
| for c in CONCEPTS: | |
| if c not in vote: | |
| continue | |
| vote_sum[c] += 2 * vote[c] - 1 | |
| return {c: vote_sum[c] > 0 if vote_sum[c] != 0 else None for c in CONCEPTS} | |
| def save_current_work( | |
| username: str, | |
| ): | |
| global all_metadata | |
| global all_votes | |
| global hf_api | |
| hf_api.snapshot_download( | |
| local_dir=f"{ASSETS_FOLDER}/{DATASET_NAME}", | |
| repo_id=DATASET_NAME, | |
| repo_type="dataset", | |
| allow_patterns=["*/metadata.jsonl", "votes/*"], | |
| ) | |
| new_votes = {} | |
| for file in os.listdir(f"{ASSETS_FOLDER}/{DATASET_NAME}/votes"): | |
| with open(f"{ASSETS_FOLDER}/{DATASET_NAME}/votes/{file}") as f: | |
| key = file.split(".")[0] | |
| new_votes[key] = json.load(f) | |
| for key in all_votes: | |
| if username in all_votes[key]: | |
| if key not in new_votes: | |
| new_votes[key] = {} | |
| new_votes[key][username] = all_votes[key][username] | |
| for key in new_votes: | |
| with open(f"{ASSETS_FOLDER}/{DATASET_NAME}/votes/{key}.json", "w") as f: | |
| json.dump(new_votes[key], f) | |
| all_votes = new_votes | |
| new_metadata = {} | |
| for split in ["train", "test"]: | |
| new_metadata[split] = [] | |
| with jsonlines.open(f"{ASSETS_FOLDER}/{DATASET_NAME}/data/{split}/metadata.jsonl") as reader: | |
| for row in reader: | |
| s_id = row["id"] | |
| if s_id in all_votes: | |
| row.update(compute_concepts(all_votes[s_id])) | |
| new_metadata[split].append(row) | |
| with jsonlines.open(f"{ASSETS_FOLDER}/{DATASET_NAME}/data/{split}/metadata.jsonl", mode='w') as writer: | |
| writer.write_all(new_metadata[split]) | |
| all_metadata = new_metadata | |
| hf_api.upload_folder( | |
| folder_path=f"{ASSETS_FOLDER}/{DATASET_NAME}", | |
| repo_id=DATASET_NAME, | |
| repo_type="dataset", | |
| allow_patterns=["*/metadata.jsonl", "votes/*"], | |
| ) | |
| def get_votes(key): | |
| global all_votes | |
| global hf_api | |
| try: | |
| hf_api.hf_hub_download( | |
| repo_id=DATASET_NAME, | |
| filename=f"votes/{key}.json", | |
| repo_type="dataset", | |
| local_dir=f"{ASSETS_FOLDER}/{DATASET_NAME}", | |
| ) | |
| with open(f"{ASSETS_FOLDER}/{DATASET_NAME}/votes/{key}.json") as f: | |
| all_votes[key] = json.load(f) | |
| except: | |
| pass | |
| def save_votes(key): | |
| global all_votes | |
| global hf_api | |
| with open(f"{ASSETS_FOLDER}/{DATASET_NAME}/votes/{key}.json", "w") as f: | |
| json.dump(all_votes[key], f) | |
| hf_api.upload_file( | |
| path_or_fileobj=f"{ASSETS_FOLDER}/{DATASET_NAME}/votes/{key}.json", | |
| path_in_repo=f"votes/{key}.json", | |
| repo_id=DATASET_NAME, | |
| repo_type="dataset", | |
| ) | |
| if gr.NO_RELOAD: | |
| setup() | |