import yaml import requests from datetime import datetime from typing import Dict, List, Any def fetch_conference_files() -> List[Dict[str, Any]]: """Fetch all conference YAML files from ccfddl repository.""" # First get the directory listing from GitHub API api_url = "https://api.github.com/repos/ccfddl/ccf-deadlines/contents/conference/AI" response = requests.get(api_url) files = response.json() conferences = [] for file in files: if file["name"].endswith(".yml"): yaml_content = requests.get(file["download_url"]).text conf_data = yaml.safe_load(yaml_content) # The data is a list with a single item if isinstance(conf_data, list) and len(conf_data) > 0: conferences.append(conf_data[0]) return conferences def parse_date_range(date_str: str, year: str) -> tuple[str, str]: """Parse various date formats and return start and end dates.""" # Remove the year if it appears at the end of the string date_str = date_str.replace(f", {year}", "") # Handle various date formats try: # Split into start and end dates if " - " in date_str: start, end = date_str.split(" - ") elif "-" in date_str: start, end = date_str.split("-") else: # For single date format like "May 19, 2025" start = end = date_str # Clean up month abbreviations month_map = { "Sept": "September", # Handle Sept before Sep "Jan": "January", "Feb": "February", "Mar": "March", "Apr": "April", "Jun": "June", "Jul": "July", "Aug": "August", "Sep": "September", "Oct": "October", "Nov": "November", "Dec": "December", } # Create a set of all month names (full and abbreviated) all_months = set(month_map.keys()) | set(month_map.values()) # Handle cases like "April 29-May 4" has_month = any(month in end for month in all_months) if not has_month: # End is just a day number, use start's month start_parts = start.split() if len(start_parts) >= 1: end = f"{start_parts[0]} {end.strip()}" # Replace month abbreviations for abbr, full in month_map.items(): start = start.replace(abbr, full) end = end.replace(abbr, full) # Clean up any extra spaces start = " ".join(start.split()) end = " ".join(end.split()) # Parse start date start_date = datetime.strptime(f"{start}, {year}", "%B %d, %Y") # Parse end date end_date = datetime.strptime(f"{end}, {year}", "%B %d, %Y") return start_date.strftime("%Y-%m-%d"), end_date.strftime("%Y-%m-%d") except Exception as e: raise ValueError(f"Could not parse date: {date_str} ({e})") def transform_conference_data( conferences: List[Dict[str, Any]], ) -> List[Dict[str, Any]]: """Transform ccfddl format to our format.""" transformed = [] current_year = datetime.now().year for conf in conferences: # Get the most recent or upcoming conference instance recent_conf = None if "confs" in conf: for instance in conf["confs"]: if instance["year"] >= current_year: recent_conf = instance break if not recent_conf: continue # Transform to our format transformed_conf = { "title": conf.get("title", ""), "year": recent_conf["year"], "id": recent_conf["id"], "full_name": conf.get("description", ""), "link": recent_conf.get("link", ""), "deadline": recent_conf.get("timeline", [{}])[0].get("deadline", ""), "timezone": recent_conf.get("timezone", ""), "date": recent_conf.get("date", ""), "tags": [], # We'll need to maintain a mapping for tags } # Handle city and country fields instead of place place = recent_conf.get("place", "") if place: # Try to parse the place into city and country if it contains a comma if "," in place: city, country = place.split(",", 1) transformed_conf["city"] = city.strip() transformed_conf["country"] = country.strip() else: # If we can't parse, just set the country transformed_conf["country"] = place.strip() # Add optional fields timeline = recent_conf.get("timeline", [{}])[0] if "abstract_deadline" in timeline: transformed_conf["abstract_deadline"] = timeline["abstract_deadline"] # Parse date range for start/end try: if transformed_conf["date"]: start_date, end_date = parse_date_range( transformed_conf["date"], str(transformed_conf["year"]) ) transformed_conf["start"] = start_date transformed_conf["end"] = end_date except Exception as e: print(f"Warning: Could not parse date for {transformed_conf['title']}: {e}") # Add rankings as separate field if "rank" in conf: rankings = [] for rank_type, rank_value in conf["rank"].items(): rankings.append(f"{rank_type.upper()}: {rank_value}") if rankings: transformed_conf["rankings"] = ", ".join(rankings) transformed.append(transformed_conf) return transformed def main(): try: # Fetch current conferences.yml current_file = "src/data/conferences.yml" with open(current_file, "r") as f: current_conferences = yaml.safe_load(f) # Fetch and transform new data new_conferences = fetch_conference_files() if not new_conferences: print("Warning: No conferences fetched from ccfddl") return transformed_conferences = transform_conference_data(new_conferences) if not transformed_conferences: print("Warning: No conferences transformed") return # Create a dictionary of current conferences by ID current_conf_dict = {conf["id"]: conf for conf in current_conferences} # Create a set of existing conference title+year combinations to check for duplicates existing_conf_keys = { (conf["title"], conf["year"]) for conf in current_conferences } # Update or add new conferences while preserving existing ones for new_conf in transformed_conferences: # Check if this is a duplicate based on title and year conf_key = (new_conf["title"], new_conf["year"]) # Skip if we already have a conference with this title and year but different ID if ( conf_key in existing_conf_keys and new_conf["id"] not in current_conf_dict ): print( f"Skipping duplicate conference: {new_conf['title']} {new_conf['year']} (ID: {new_conf['id']})" ) continue if new_conf["id"] in current_conf_dict: # Update existing conference while preserving fields curr_conf = current_conf_dict[new_conf["id"]] # Preserve existing fields preserved_fields = [ "tags", "venue", "hindex", "submission_deadline", "timezone_submission", "rebuttal_period_start", "rebuttal_period_end", "final_decision_date", "review_release_date", "commitment_deadline", "start", "end", "note", "city", "country", # Added city and country to preserved fields ] for field in preserved_fields: if field in curr_conf: new_conf[field] = curr_conf[field] # If start/end not in current conference but we parsed them, keep the parsed ones if "start" not in curr_conf and "start" in new_conf: new_conf["start"] = new_conf["start"] if "end" not in curr_conf and "end" in new_conf: new_conf["end"] = new_conf["end"] # Preserve existing rankings if available if "rankings" in curr_conf: new_conf["rankings"] = curr_conf["rankings"] # Update the conference in the dictionary current_conf_dict[new_conf["id"]] = new_conf else: # Add new conference to the dictionary current_conf_dict[new_conf["id"]] = new_conf # Add to our set of existing conference keys existing_conf_keys.add(conf_key) # Convert back to list and sort by deadline all_conferences = list(current_conf_dict.values()) all_conferences.sort(key=lambda x: x.get("deadline", "9999")) # Write back to file with newlines between conferences with open(current_file, "w") as f: for i, conf in enumerate(all_conferences): if i > 0: f.write("\n\n") # Add two newlines between conferences yaml_str = yaml.dump( [conf], allow_unicode=True, sort_keys=False, default_flow_style=False, explicit_start=False, explicit_end=False, width=float("inf"), indent=2, default_style=None, ) f.write(yaml_str.rstrip()) # Remove trailing whitespace # Add final newline f.write("\n") print(f"Successfully updated {len(all_conferences)} conferences") except Exception as e: print(f"Error: {e}") raise if __name__ == "__main__": main()