Spaces:
Sleeping
Sleeping
Ruff
Browse files- .github/scripts/update_conferences.py +135 -117
- .github/scripts/update_conferences_new.py +148 -130
- agents/agent.py +37 -22
- agents/modal_agent.py +27 -26
- pyproject.toml +1 -0
- uv.lock +28 -0
.github/scripts/update_conferences.py
CHANGED
|
@@ -6,21 +6,21 @@ from typing import Dict, List, Any
|
|
| 6 |
|
| 7 |
def fetch_conference_files() -> List[Dict[str, Any]]:
|
| 8 |
"""Fetch all conference YAML files from ccfddl repository."""
|
| 9 |
-
|
| 10 |
# First get the directory listing from GitHub API
|
| 11 |
api_url = "https://api.github.com/repos/ccfddl/ccf-deadlines/contents/conference/AI"
|
| 12 |
response = requests.get(api_url)
|
| 13 |
files = response.json()
|
| 14 |
-
|
| 15 |
conferences = []
|
| 16 |
for file in files:
|
| 17 |
-
if file[
|
| 18 |
-
yaml_content = requests.get(file[
|
| 19 |
conf_data = yaml.safe_load(yaml_content)
|
| 20 |
# The data is a list with a single item
|
| 21 |
if isinstance(conf_data, list) and len(conf_data) > 0:
|
| 22 |
conferences.append(conf_data[0])
|
| 23 |
-
|
| 24 |
return conferences
|
| 25 |
|
| 26 |
|
|
@@ -28,37 +28,37 @@ def parse_date_range(date_str: str, year: str) -> tuple[str, str]:
|
|
| 28 |
"""Parse various date formats and return start and end dates."""
|
| 29 |
# Remove the year if it appears at the end of the string
|
| 30 |
date_str = date_str.replace(f", {year}", "")
|
| 31 |
-
|
| 32 |
# Handle various date formats
|
| 33 |
try:
|
| 34 |
# Split into start and end dates
|
| 35 |
-
if
|
| 36 |
-
start, end = date_str.split(
|
| 37 |
-
elif
|
| 38 |
-
start, end = date_str.split(
|
| 39 |
else:
|
| 40 |
# For single date format like "May 19, 2025"
|
| 41 |
start = end = date_str
|
| 42 |
-
|
| 43 |
# Clean up month abbreviations
|
| 44 |
month_map = {
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
}
|
| 58 |
-
|
| 59 |
# Create a set of all month names (full and abbreviated)
|
| 60 |
all_months = set(month_map.keys()) | set(month_map.values())
|
| 61 |
-
|
| 62 |
# Handle cases like "April 29-May 4"
|
| 63 |
has_month = any(month in end for month in all_months)
|
| 64 |
if not has_month:
|
|
@@ -66,178 +66,196 @@ def parse_date_range(date_str: str, year: str) -> tuple[str, str]:
|
|
| 66 |
start_parts = start.split()
|
| 67 |
if len(start_parts) >= 1:
|
| 68 |
end = f"{start_parts[0]} {end.strip()}"
|
| 69 |
-
|
| 70 |
# Replace month abbreviations
|
| 71 |
for abbr, full in month_map.items():
|
| 72 |
start = start.replace(abbr, full)
|
| 73 |
end = end.replace(abbr, full)
|
| 74 |
-
|
| 75 |
# Clean up any extra spaces
|
| 76 |
-
start =
|
| 77 |
-
end =
|
| 78 |
-
|
| 79 |
# Parse start date
|
| 80 |
start_date = datetime.strptime(f"{start}, {year}", "%B %d, %Y")
|
| 81 |
-
|
| 82 |
# Parse end date
|
| 83 |
end_date = datetime.strptime(f"{end}, {year}", "%B %d, %Y")
|
| 84 |
-
|
| 85 |
-
return start_date.strftime(
|
| 86 |
-
|
| 87 |
except Exception as e:
|
| 88 |
raise ValueError(f"Could not parse date: {date_str} ({e})")
|
| 89 |
|
| 90 |
|
| 91 |
-
def transform_conference_data(
|
|
|
|
|
|
|
| 92 |
"""Transform ccfddl format to our format."""
|
| 93 |
transformed = []
|
| 94 |
current_year = datetime.now().year
|
| 95 |
-
|
| 96 |
for conf in conferences:
|
| 97 |
# Get the most recent or upcoming conference instance
|
| 98 |
recent_conf = None
|
| 99 |
-
if
|
| 100 |
-
for instance in conf[
|
| 101 |
-
if instance[
|
| 102 |
recent_conf = instance
|
| 103 |
break
|
| 104 |
-
|
| 105 |
if not recent_conf:
|
| 106 |
continue
|
| 107 |
-
|
| 108 |
# Transform to our format
|
| 109 |
transformed_conf = {
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
| 119 |
}
|
| 120 |
-
|
| 121 |
# Handle city and country fields instead of place
|
| 122 |
-
place = recent_conf.get(
|
| 123 |
if place:
|
| 124 |
# Try to parse the place into city and country if it contains a comma
|
| 125 |
-
if
|
| 126 |
-
city, country = place.split(
|
| 127 |
-
transformed_conf[
|
| 128 |
-
transformed_conf[
|
| 129 |
else:
|
| 130 |
# If we can't parse, just set the country
|
| 131 |
-
transformed_conf[
|
| 132 |
-
|
| 133 |
# Add optional fields
|
| 134 |
-
timeline = recent_conf.get(
|
| 135 |
-
if
|
| 136 |
-
transformed_conf[
|
| 137 |
-
|
| 138 |
# Parse date range for start/end
|
| 139 |
try:
|
| 140 |
-
if transformed_conf[
|
| 141 |
start_date, end_date = parse_date_range(
|
| 142 |
-
transformed_conf[
|
| 143 |
-
str(transformed_conf['year'])
|
| 144 |
)
|
| 145 |
-
transformed_conf[
|
| 146 |
-
transformed_conf[
|
| 147 |
except Exception as e:
|
| 148 |
print(f"Warning: Could not parse date for {transformed_conf['title']}: {e}")
|
| 149 |
-
|
| 150 |
# Add rankings as separate field
|
| 151 |
-
if
|
| 152 |
rankings = []
|
| 153 |
-
for rank_type, rank_value in conf[
|
| 154 |
rankings.append(f"{rank_type.upper()}: {rank_value}")
|
| 155 |
if rankings:
|
| 156 |
-
transformed_conf[
|
| 157 |
-
|
| 158 |
transformed.append(transformed_conf)
|
| 159 |
-
|
| 160 |
return transformed
|
| 161 |
|
| 162 |
|
| 163 |
def main():
|
| 164 |
try:
|
| 165 |
# Fetch current conferences.yml
|
| 166 |
-
current_file =
|
| 167 |
-
with open(current_file,
|
| 168 |
current_conferences = yaml.safe_load(f)
|
| 169 |
-
|
| 170 |
# Fetch and transform new data
|
| 171 |
new_conferences = fetch_conference_files()
|
| 172 |
if not new_conferences:
|
| 173 |
print("Warning: No conferences fetched from ccfddl")
|
| 174 |
return
|
| 175 |
-
|
| 176 |
transformed_conferences = transform_conference_data(new_conferences)
|
| 177 |
if not transformed_conferences:
|
| 178 |
print("Warning: No conferences transformed")
|
| 179 |
return
|
| 180 |
-
|
| 181 |
# Create a dictionary of current conferences by ID
|
| 182 |
-
current_conf_dict = {conf[
|
| 183 |
-
|
| 184 |
# Create a set of existing conference title+year combinations to check for duplicates
|
| 185 |
-
existing_conf_keys = {
|
| 186 |
-
|
|
|
|
|
|
|
| 187 |
# Update or add new conferences while preserving existing ones
|
| 188 |
for new_conf in transformed_conferences:
|
| 189 |
# Check if this is a duplicate based on title and year
|
| 190 |
-
conf_key = (new_conf[
|
| 191 |
-
|
| 192 |
# Skip if we already have a conference with this title and year but different ID
|
| 193 |
-
if
|
| 194 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 195 |
continue
|
| 196 |
-
|
| 197 |
-
if new_conf[
|
| 198 |
# Update existing conference while preserving fields
|
| 199 |
-
curr_conf = current_conf_dict[new_conf[
|
| 200 |
-
|
| 201 |
# Preserve existing fields
|
| 202 |
preserved_fields = [
|
| 203 |
-
|
| 204 |
-
|
| 205 |
-
|
| 206 |
-
|
| 207 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 208 |
]
|
| 209 |
for field in preserved_fields:
|
| 210 |
if field in curr_conf:
|
| 211 |
new_conf[field] = curr_conf[field]
|
| 212 |
-
|
| 213 |
# If start/end not in current conference but we parsed them, keep the parsed ones
|
| 214 |
-
if
|
| 215 |
-
new_conf[
|
| 216 |
-
if
|
| 217 |
-
new_conf[
|
| 218 |
-
|
| 219 |
# Preserve existing rankings if available
|
| 220 |
-
if
|
| 221 |
-
new_conf[
|
| 222 |
-
|
| 223 |
# Update the conference in the dictionary
|
| 224 |
-
current_conf_dict[new_conf[
|
| 225 |
else:
|
| 226 |
# Add new conference to the dictionary
|
| 227 |
-
current_conf_dict[new_conf[
|
| 228 |
# Add to our set of existing conference keys
|
| 229 |
existing_conf_keys.add(conf_key)
|
| 230 |
-
|
| 231 |
# Convert back to list and sort by deadline
|
| 232 |
all_conferences = list(current_conf_dict.values())
|
| 233 |
-
all_conferences.sort(key=lambda x: x.get(
|
| 234 |
-
|
| 235 |
# Write back to file with newlines between conferences
|
| 236 |
-
with open(current_file,
|
| 237 |
for i, conf in enumerate(all_conferences):
|
| 238 |
if i > 0:
|
| 239 |
-
f.write(
|
| 240 |
-
|
| 241 |
yaml_str = yaml.dump(
|
| 242 |
[conf],
|
| 243 |
allow_unicode=True,
|
|
@@ -250,16 +268,16 @@ def main():
|
|
| 250 |
default_style=None,
|
| 251 |
)
|
| 252 |
f.write(yaml_str.rstrip()) # Remove trailing whitespace
|
| 253 |
-
|
| 254 |
# Add final newline
|
| 255 |
-
f.write(
|
| 256 |
-
|
| 257 |
print(f"Successfully updated {len(all_conferences)} conferences")
|
| 258 |
-
|
| 259 |
except Exception as e:
|
| 260 |
print(f"Error: {e}")
|
| 261 |
raise
|
| 262 |
|
| 263 |
|
| 264 |
if __name__ == "__main__":
|
| 265 |
-
main()
|
|
|
|
| 6 |
|
| 7 |
def fetch_conference_files() -> List[Dict[str, Any]]:
|
| 8 |
"""Fetch all conference YAML files from ccfddl repository."""
|
| 9 |
+
|
| 10 |
# First get the directory listing from GitHub API
|
| 11 |
api_url = "https://api.github.com/repos/ccfddl/ccf-deadlines/contents/conference/AI"
|
| 12 |
response = requests.get(api_url)
|
| 13 |
files = response.json()
|
| 14 |
+
|
| 15 |
conferences = []
|
| 16 |
for file in files:
|
| 17 |
+
if file["name"].endswith(".yml"):
|
| 18 |
+
yaml_content = requests.get(file["download_url"]).text
|
| 19 |
conf_data = yaml.safe_load(yaml_content)
|
| 20 |
# The data is a list with a single item
|
| 21 |
if isinstance(conf_data, list) and len(conf_data) > 0:
|
| 22 |
conferences.append(conf_data[0])
|
| 23 |
+
|
| 24 |
return conferences
|
| 25 |
|
| 26 |
|
|
|
|
| 28 |
"""Parse various date formats and return start and end dates."""
|
| 29 |
# Remove the year if it appears at the end of the string
|
| 30 |
date_str = date_str.replace(f", {year}", "")
|
| 31 |
+
|
| 32 |
# Handle various date formats
|
| 33 |
try:
|
| 34 |
# Split into start and end dates
|
| 35 |
+
if " - " in date_str:
|
| 36 |
+
start, end = date_str.split(" - ")
|
| 37 |
+
elif "-" in date_str:
|
| 38 |
+
start, end = date_str.split("-")
|
| 39 |
else:
|
| 40 |
# For single date format like "May 19, 2025"
|
| 41 |
start = end = date_str
|
| 42 |
+
|
| 43 |
# Clean up month abbreviations
|
| 44 |
month_map = {
|
| 45 |
+
"Sept": "September", # Handle Sept before Sep
|
| 46 |
+
"Jan": "January",
|
| 47 |
+
"Feb": "February",
|
| 48 |
+
"Mar": "March",
|
| 49 |
+
"Apr": "April",
|
| 50 |
+
"Jun": "June",
|
| 51 |
+
"Jul": "July",
|
| 52 |
+
"Aug": "August",
|
| 53 |
+
"Sep": "September",
|
| 54 |
+
"Oct": "October",
|
| 55 |
+
"Nov": "November",
|
| 56 |
+
"Dec": "December",
|
| 57 |
}
|
| 58 |
+
|
| 59 |
# Create a set of all month names (full and abbreviated)
|
| 60 |
all_months = set(month_map.keys()) | set(month_map.values())
|
| 61 |
+
|
| 62 |
# Handle cases like "April 29-May 4"
|
| 63 |
has_month = any(month in end for month in all_months)
|
| 64 |
if not has_month:
|
|
|
|
| 66 |
start_parts = start.split()
|
| 67 |
if len(start_parts) >= 1:
|
| 68 |
end = f"{start_parts[0]} {end.strip()}"
|
| 69 |
+
|
| 70 |
# Replace month abbreviations
|
| 71 |
for abbr, full in month_map.items():
|
| 72 |
start = start.replace(abbr, full)
|
| 73 |
end = end.replace(abbr, full)
|
| 74 |
+
|
| 75 |
# Clean up any extra spaces
|
| 76 |
+
start = " ".join(start.split())
|
| 77 |
+
end = " ".join(end.split())
|
| 78 |
+
|
| 79 |
# Parse start date
|
| 80 |
start_date = datetime.strptime(f"{start}, {year}", "%B %d, %Y")
|
| 81 |
+
|
| 82 |
# Parse end date
|
| 83 |
end_date = datetime.strptime(f"{end}, {year}", "%B %d, %Y")
|
| 84 |
+
|
| 85 |
+
return start_date.strftime("%Y-%m-%d"), end_date.strftime("%Y-%m-%d")
|
| 86 |
+
|
| 87 |
except Exception as e:
|
| 88 |
raise ValueError(f"Could not parse date: {date_str} ({e})")
|
| 89 |
|
| 90 |
|
| 91 |
+
def transform_conference_data(
|
| 92 |
+
conferences: List[Dict[str, Any]],
|
| 93 |
+
) -> List[Dict[str, Any]]:
|
| 94 |
"""Transform ccfddl format to our format."""
|
| 95 |
transformed = []
|
| 96 |
current_year = datetime.now().year
|
| 97 |
+
|
| 98 |
for conf in conferences:
|
| 99 |
# Get the most recent or upcoming conference instance
|
| 100 |
recent_conf = None
|
| 101 |
+
if "confs" in conf:
|
| 102 |
+
for instance in conf["confs"]:
|
| 103 |
+
if instance["year"] >= current_year:
|
| 104 |
recent_conf = instance
|
| 105 |
break
|
| 106 |
+
|
| 107 |
if not recent_conf:
|
| 108 |
continue
|
| 109 |
+
|
| 110 |
# Transform to our format
|
| 111 |
transformed_conf = {
|
| 112 |
+
"title": conf.get("title", ""),
|
| 113 |
+
"year": recent_conf["year"],
|
| 114 |
+
"id": recent_conf["id"],
|
| 115 |
+
"full_name": conf.get("description", ""),
|
| 116 |
+
"link": recent_conf.get("link", ""),
|
| 117 |
+
"deadline": recent_conf.get("timeline", [{}])[0].get("deadline", ""),
|
| 118 |
+
"timezone": recent_conf.get("timezone", ""),
|
| 119 |
+
"date": recent_conf.get("date", ""),
|
| 120 |
+
"tags": [], # We'll need to maintain a mapping for tags
|
| 121 |
}
|
| 122 |
+
|
| 123 |
# Handle city and country fields instead of place
|
| 124 |
+
place = recent_conf.get("place", "")
|
| 125 |
if place:
|
| 126 |
# Try to parse the place into city and country if it contains a comma
|
| 127 |
+
if "," in place:
|
| 128 |
+
city, country = place.split(",", 1)
|
| 129 |
+
transformed_conf["city"] = city.strip()
|
| 130 |
+
transformed_conf["country"] = country.strip()
|
| 131 |
else:
|
| 132 |
# If we can't parse, just set the country
|
| 133 |
+
transformed_conf["country"] = place.strip()
|
| 134 |
+
|
| 135 |
# Add optional fields
|
| 136 |
+
timeline = recent_conf.get("timeline", [{}])[0]
|
| 137 |
+
if "abstract_deadline" in timeline:
|
| 138 |
+
transformed_conf["abstract_deadline"] = timeline["abstract_deadline"]
|
| 139 |
+
|
| 140 |
# Parse date range for start/end
|
| 141 |
try:
|
| 142 |
+
if transformed_conf["date"]:
|
| 143 |
start_date, end_date = parse_date_range(
|
| 144 |
+
transformed_conf["date"], str(transformed_conf["year"])
|
|
|
|
| 145 |
)
|
| 146 |
+
transformed_conf["start"] = start_date
|
| 147 |
+
transformed_conf["end"] = end_date
|
| 148 |
except Exception as e:
|
| 149 |
print(f"Warning: Could not parse date for {transformed_conf['title']}: {e}")
|
| 150 |
+
|
| 151 |
# Add rankings as separate field
|
| 152 |
+
if "rank" in conf:
|
| 153 |
rankings = []
|
| 154 |
+
for rank_type, rank_value in conf["rank"].items():
|
| 155 |
rankings.append(f"{rank_type.upper()}: {rank_value}")
|
| 156 |
if rankings:
|
| 157 |
+
transformed_conf["rankings"] = ", ".join(rankings)
|
| 158 |
+
|
| 159 |
transformed.append(transformed_conf)
|
| 160 |
+
|
| 161 |
return transformed
|
| 162 |
|
| 163 |
|
| 164 |
def main():
|
| 165 |
try:
|
| 166 |
# Fetch current conferences.yml
|
| 167 |
+
current_file = "src/data/conferences.yml"
|
| 168 |
+
with open(current_file, "r") as f:
|
| 169 |
current_conferences = yaml.safe_load(f)
|
| 170 |
+
|
| 171 |
# Fetch and transform new data
|
| 172 |
new_conferences = fetch_conference_files()
|
| 173 |
if not new_conferences:
|
| 174 |
print("Warning: No conferences fetched from ccfddl")
|
| 175 |
return
|
| 176 |
+
|
| 177 |
transformed_conferences = transform_conference_data(new_conferences)
|
| 178 |
if not transformed_conferences:
|
| 179 |
print("Warning: No conferences transformed")
|
| 180 |
return
|
| 181 |
+
|
| 182 |
# Create a dictionary of current conferences by ID
|
| 183 |
+
current_conf_dict = {conf["id"]: conf for conf in current_conferences}
|
| 184 |
+
|
| 185 |
# Create a set of existing conference title+year combinations to check for duplicates
|
| 186 |
+
existing_conf_keys = {
|
| 187 |
+
(conf["title"], conf["year"]) for conf in current_conferences
|
| 188 |
+
}
|
| 189 |
+
|
| 190 |
# Update or add new conferences while preserving existing ones
|
| 191 |
for new_conf in transformed_conferences:
|
| 192 |
# Check if this is a duplicate based on title and year
|
| 193 |
+
conf_key = (new_conf["title"], new_conf["year"])
|
| 194 |
+
|
| 195 |
# Skip if we already have a conference with this title and year but different ID
|
| 196 |
+
if (
|
| 197 |
+
conf_key in existing_conf_keys
|
| 198 |
+
and new_conf["id"] not in current_conf_dict
|
| 199 |
+
):
|
| 200 |
+
print(
|
| 201 |
+
f"Skipping duplicate conference: {new_conf['title']} {new_conf['year']} (ID: {new_conf['id']})"
|
| 202 |
+
)
|
| 203 |
continue
|
| 204 |
+
|
| 205 |
+
if new_conf["id"] in current_conf_dict:
|
| 206 |
# Update existing conference while preserving fields
|
| 207 |
+
curr_conf = current_conf_dict[new_conf["id"]]
|
| 208 |
+
|
| 209 |
# Preserve existing fields
|
| 210 |
preserved_fields = [
|
| 211 |
+
"tags",
|
| 212 |
+
"venue",
|
| 213 |
+
"hindex",
|
| 214 |
+
"submission_deadline",
|
| 215 |
+
"timezone_submission",
|
| 216 |
+
"rebuttal_period_start",
|
| 217 |
+
"rebuttal_period_end",
|
| 218 |
+
"final_decision_date",
|
| 219 |
+
"review_release_date",
|
| 220 |
+
"commitment_deadline",
|
| 221 |
+
"start",
|
| 222 |
+
"end",
|
| 223 |
+
"note",
|
| 224 |
+
"city",
|
| 225 |
+
"country", # Added city and country to preserved fields
|
| 226 |
]
|
| 227 |
for field in preserved_fields:
|
| 228 |
if field in curr_conf:
|
| 229 |
new_conf[field] = curr_conf[field]
|
| 230 |
+
|
| 231 |
# If start/end not in current conference but we parsed them, keep the parsed ones
|
| 232 |
+
if "start" not in curr_conf and "start" in new_conf:
|
| 233 |
+
new_conf["start"] = new_conf["start"]
|
| 234 |
+
if "end" not in curr_conf and "end" in new_conf:
|
| 235 |
+
new_conf["end"] = new_conf["end"]
|
| 236 |
+
|
| 237 |
# Preserve existing rankings if available
|
| 238 |
+
if "rankings" in curr_conf:
|
| 239 |
+
new_conf["rankings"] = curr_conf["rankings"]
|
| 240 |
+
|
| 241 |
# Update the conference in the dictionary
|
| 242 |
+
current_conf_dict[new_conf["id"]] = new_conf
|
| 243 |
else:
|
| 244 |
# Add new conference to the dictionary
|
| 245 |
+
current_conf_dict[new_conf["id"]] = new_conf
|
| 246 |
# Add to our set of existing conference keys
|
| 247 |
existing_conf_keys.add(conf_key)
|
| 248 |
+
|
| 249 |
# Convert back to list and sort by deadline
|
| 250 |
all_conferences = list(current_conf_dict.values())
|
| 251 |
+
all_conferences.sort(key=lambda x: x.get("deadline", "9999"))
|
| 252 |
+
|
| 253 |
# Write back to file with newlines between conferences
|
| 254 |
+
with open(current_file, "w") as f:
|
| 255 |
for i, conf in enumerate(all_conferences):
|
| 256 |
if i > 0:
|
| 257 |
+
f.write("\n\n") # Add two newlines between conferences
|
| 258 |
+
|
| 259 |
yaml_str = yaml.dump(
|
| 260 |
[conf],
|
| 261 |
allow_unicode=True,
|
|
|
|
| 268 |
default_style=None,
|
| 269 |
)
|
| 270 |
f.write(yaml_str.rstrip()) # Remove trailing whitespace
|
| 271 |
+
|
| 272 |
# Add final newline
|
| 273 |
+
f.write("\n")
|
| 274 |
+
|
| 275 |
print(f"Successfully updated {len(all_conferences)} conferences")
|
| 276 |
+
|
| 277 |
except Exception as e:
|
| 278 |
print(f"Error: {e}")
|
| 279 |
raise
|
| 280 |
|
| 281 |
|
| 282 |
if __name__ == "__main__":
|
| 283 |
+
main()
|
.github/scripts/update_conferences_new.py
CHANGED
|
@@ -8,21 +8,21 @@ from typing import Dict, List, Any
|
|
| 8 |
|
| 9 |
def fetch_conference_files() -> List[Dict[str, Any]]:
|
| 10 |
"""Fetch all conference YAML files from ccfddl repository."""
|
| 11 |
-
|
| 12 |
# First get the directory listing from GitHub API
|
| 13 |
api_url = "https://api.github.com/repos/ccfddl/ccf-deadlines/contents/conference/AI"
|
| 14 |
response = requests.get(api_url)
|
| 15 |
files = response.json()
|
| 16 |
-
|
| 17 |
conferences = []
|
| 18 |
for file in files:
|
| 19 |
-
if file[
|
| 20 |
-
yaml_content = requests.get(file[
|
| 21 |
conf_data = yaml.safe_load(yaml_content)
|
| 22 |
# The data is a list with a single item
|
| 23 |
if isinstance(conf_data, list) and len(conf_data) > 0:
|
| 24 |
conferences.append(conf_data[0])
|
| 25 |
-
|
| 26 |
return conferences
|
| 27 |
|
| 28 |
|
|
@@ -30,37 +30,37 @@ def parse_date_range(date_str: str, year: str) -> tuple[str, str]:
|
|
| 30 |
"""Parse various date formats and return start and end dates."""
|
| 31 |
# Remove the year if it appears at the end of the string
|
| 32 |
date_str = date_str.replace(f", {year}", "")
|
| 33 |
-
|
| 34 |
# Handle various date formats
|
| 35 |
try:
|
| 36 |
# Split into start and end dates
|
| 37 |
-
if
|
| 38 |
-
start, end = date_str.split(
|
| 39 |
-
elif
|
| 40 |
-
start, end = date_str.split(
|
| 41 |
else:
|
| 42 |
# For single date format like "May 19, 2025"
|
| 43 |
start = end = date_str
|
| 44 |
-
|
| 45 |
# Clean up month abbreviations
|
| 46 |
month_map = {
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
}
|
| 60 |
-
|
| 61 |
# Create a set of all month names (full and abbreviated)
|
| 62 |
all_months = set(month_map.keys()) | set(month_map.values())
|
| 63 |
-
|
| 64 |
# Handle cases like "April 29-May 4"
|
| 65 |
has_month = any(month in end for month in all_months)
|
| 66 |
if not has_month:
|
|
@@ -68,152 +68,153 @@ def parse_date_range(date_str: str, year: str) -> tuple[str, str]:
|
|
| 68 |
start_parts = start.split()
|
| 69 |
if len(start_parts) >= 1:
|
| 70 |
end = f"{start_parts[0]} {end.strip()}"
|
| 71 |
-
|
| 72 |
# Replace month abbreviations
|
| 73 |
for abbr, full in month_map.items():
|
| 74 |
start = start.replace(abbr, full)
|
| 75 |
end = end.replace(abbr, full)
|
| 76 |
-
|
| 77 |
# Clean up any extra spaces
|
| 78 |
-
start =
|
| 79 |
-
end =
|
| 80 |
-
|
| 81 |
# Parse start date
|
| 82 |
start_date = datetime.strptime(f"{start}, {year}", "%B %d, %Y")
|
| 83 |
-
|
| 84 |
# Parse end date
|
| 85 |
end_date = datetime.strptime(f"{end}, {year}", "%B %d, %Y")
|
| 86 |
-
|
| 87 |
-
return start_date.strftime(
|
| 88 |
-
|
| 89 |
except Exception as e:
|
| 90 |
raise ValueError(f"Could not parse date: {date_str} ({e})")
|
| 91 |
|
| 92 |
|
| 93 |
-
def transform_conference_data(
|
|
|
|
|
|
|
| 94 |
"""Transform ccfddl format to our format."""
|
| 95 |
transformed = []
|
| 96 |
current_year = datetime.now().year
|
| 97 |
-
|
| 98 |
for conf in conferences:
|
| 99 |
# Get the most recent or upcoming conference instance
|
| 100 |
recent_conf = None
|
| 101 |
-
if
|
| 102 |
-
for instance in conf[
|
| 103 |
-
if instance[
|
| 104 |
recent_conf = instance
|
| 105 |
break
|
| 106 |
-
|
| 107 |
if not recent_conf:
|
| 108 |
continue
|
| 109 |
-
|
| 110 |
# Transform to our format
|
| 111 |
transformed_conf = {
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
| 121 |
}
|
| 122 |
-
|
| 123 |
# Handle city and country fields instead of place
|
| 124 |
-
place = recent_conf.get(
|
| 125 |
if place:
|
| 126 |
# Try to parse the place into city and country if it contains a comma
|
| 127 |
-
if
|
| 128 |
-
city, country = place.split(
|
| 129 |
-
transformed_conf[
|
| 130 |
-
transformed_conf[
|
| 131 |
else:
|
| 132 |
# If we can't parse, just set the country
|
| 133 |
-
transformed_conf[
|
| 134 |
-
|
| 135 |
# Add optional fields
|
| 136 |
-
timeline = recent_conf.get(
|
| 137 |
-
if
|
| 138 |
-
transformed_conf[
|
| 139 |
-
|
| 140 |
# Parse date range for start/end
|
| 141 |
try:
|
| 142 |
-
if transformed_conf[
|
| 143 |
start_date, end_date = parse_date_range(
|
| 144 |
-
transformed_conf[
|
| 145 |
-
str(transformed_conf['year'])
|
| 146 |
)
|
| 147 |
-
transformed_conf[
|
| 148 |
-
transformed_conf[
|
| 149 |
except Exception as e:
|
| 150 |
print(f"Warning: Could not parse date for {transformed_conf['title']}: {e}")
|
| 151 |
-
|
| 152 |
# Add rankings as separate field
|
| 153 |
-
if
|
| 154 |
rankings = []
|
| 155 |
-
for rank_type, rank_value in conf[
|
| 156 |
rankings.append(f"{rank_type.upper()}: {rank_value}")
|
| 157 |
if rankings:
|
| 158 |
-
transformed_conf[
|
| 159 |
-
|
| 160 |
transformed.append(transformed_conf)
|
| 161 |
-
|
| 162 |
return transformed
|
| 163 |
|
| 164 |
|
| 165 |
def load_all_current_conferences() -> Dict[str, List[Dict[str, Any]]]:
|
| 166 |
"""Load all current conferences from individual files."""
|
| 167 |
-
conferences_dir =
|
| 168 |
conference_groups = {}
|
| 169 |
-
|
| 170 |
if not os.path.exists(conferences_dir):
|
| 171 |
return {}
|
| 172 |
-
|
| 173 |
for filename in os.listdir(conferences_dir):
|
| 174 |
-
if filename.endswith(
|
| 175 |
filepath = os.path.join(conferences_dir, filename)
|
| 176 |
-
with open(filepath,
|
| 177 |
conferences = yaml.safe_load(f)
|
| 178 |
if conferences:
|
| 179 |
# Extract conference title from the first entry
|
| 180 |
-
title = conferences[0][
|
| 181 |
conference_groups[title] = conferences
|
| 182 |
-
|
| 183 |
return conference_groups
|
| 184 |
|
| 185 |
|
| 186 |
def create_filename_from_title(title: str) -> str:
|
| 187 |
"""Create a filename-safe version of the conference title."""
|
| 188 |
-
filename = re.sub(r
|
| 189 |
-
filename = re.sub(r
|
| 190 |
-
filename = filename.replace(
|
| 191 |
-
filename = filename.strip(
|
| 192 |
return filename
|
| 193 |
|
| 194 |
|
| 195 |
def update_conference_loader():
|
| 196 |
"""Update the conference loader file with all current conferences."""
|
| 197 |
-
conferences_dir =
|
| 198 |
-
loader_path =
|
| 199 |
-
|
| 200 |
# Get all conference files
|
| 201 |
conference_files = []
|
| 202 |
if os.path.exists(conferences_dir):
|
| 203 |
for filename in sorted(os.listdir(conferences_dir)):
|
| 204 |
-
if filename.endswith(
|
| 205 |
conference_files.append(filename)
|
| 206 |
-
|
| 207 |
# Generate import statements
|
| 208 |
imports = []
|
| 209 |
variable_names = []
|
| 210 |
-
|
| 211 |
for filename in conference_files:
|
| 212 |
# Create variable name from filename
|
| 213 |
-
var_name = filename.replace(
|
| 214 |
variable_names.append(var_name)
|
| 215 |
imports.append(f"import {var_name} from '@/data/conferences/{filename}';")
|
| 216 |
-
|
| 217 |
# Generate the loader file content
|
| 218 |
loader_content = f"""import {{ Conference }} from '@/types/conference';
|
| 219 |
|
|
@@ -222,15 +223,15 @@ def update_conference_loader():
|
|
| 222 |
|
| 223 |
// Combine all conference data into a single array
|
| 224 |
const allConferencesData: Conference[] = [
|
| 225 |
-
{chr(10).join(f
|
| 226 |
];
|
| 227 |
|
| 228 |
export default allConferencesData;"""
|
| 229 |
-
|
| 230 |
# Write the loader file
|
| 231 |
-
with open(loader_path,
|
| 232 |
f.write(loader_content)
|
| 233 |
-
|
| 234 |
print(f"Updated conference loader with {len(conference_files)} conference files")
|
| 235 |
|
| 236 |
|
|
@@ -238,83 +239,100 @@ def main():
|
|
| 238 |
try:
|
| 239 |
# Load current conferences from individual files
|
| 240 |
current_conference_groups = load_all_current_conferences()
|
| 241 |
-
|
| 242 |
# Fetch and transform new data
|
| 243 |
new_conferences = fetch_conference_files()
|
| 244 |
if not new_conferences:
|
| 245 |
print("Warning: No conferences fetched from ccfddl")
|
| 246 |
return
|
| 247 |
-
|
| 248 |
transformed_conferences = transform_conference_data(new_conferences)
|
| 249 |
if not transformed_conferences:
|
| 250 |
print("Warning: No conferences transformed")
|
| 251 |
return
|
| 252 |
-
|
| 253 |
# Create conferences directory if it doesn't exist
|
| 254 |
-
conferences_dir =
|
| 255 |
os.makedirs(conferences_dir, exist_ok=True)
|
| 256 |
-
|
| 257 |
# Group new conferences by title
|
| 258 |
new_conference_groups = {}
|
| 259 |
for conf in transformed_conferences:
|
| 260 |
-
title = conf[
|
| 261 |
if title not in new_conference_groups:
|
| 262 |
new_conference_groups[title] = []
|
| 263 |
new_conference_groups[title].append(conf)
|
| 264 |
-
|
| 265 |
# Update each conference group
|
| 266 |
updated_count = 0
|
| 267 |
for title, new_confs in new_conference_groups.items():
|
| 268 |
-
filename = create_filename_from_title(title) +
|
| 269 |
filepath = os.path.join(conferences_dir, filename)
|
| 270 |
-
|
| 271 |
# Get current conferences for this title
|
| 272 |
current_confs = current_conference_groups.get(title, [])
|
| 273 |
-
current_conf_dict = {conf[
|
| 274 |
-
|
| 275 |
# Update or add new conferences
|
| 276 |
for new_conf in new_confs:
|
| 277 |
-
if new_conf[
|
| 278 |
# Update existing conference while preserving fields
|
| 279 |
-
curr_conf = current_conf_dict[new_conf[
|
| 280 |
-
|
| 281 |
# Preserve existing fields
|
| 282 |
preserved_fields = [
|
| 283 |
-
|
| 284 |
-
|
| 285 |
-
|
| 286 |
-
|
| 287 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 288 |
]
|
| 289 |
for field in preserved_fields:
|
| 290 |
if field in curr_conf:
|
| 291 |
new_conf[field] = curr_conf[field]
|
| 292 |
-
|
| 293 |
# Preserve existing rankings if available
|
| 294 |
-
if
|
| 295 |
-
new_conf[
|
| 296 |
-
|
| 297 |
-
current_conf_dict[new_conf[
|
| 298 |
else:
|
| 299 |
# Add new conference
|
| 300 |
-
current_conf_dict[new_conf[
|
| 301 |
-
|
| 302 |
# Convert back to list and sort by year
|
| 303 |
all_confs = list(current_conf_dict.values())
|
| 304 |
-
all_confs.sort(key=lambda x: x.get(
|
| 305 |
-
|
| 306 |
# Write to individual file
|
| 307 |
-
with open(filepath,
|
| 308 |
-
yaml.dump(
|
| 309 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 310 |
updated_count += 1
|
| 311 |
print(f"Updated {filename} with {len(all_confs)} entries")
|
| 312 |
-
|
| 313 |
# Update the conference loader
|
| 314 |
update_conference_loader()
|
| 315 |
-
|
| 316 |
print(f"Successfully updated {updated_count} conference files")
|
| 317 |
-
|
| 318 |
except Exception as e:
|
| 319 |
print(f"Error: {e}")
|
| 320 |
raise
|
|
|
|
| 8 |
|
| 9 |
def fetch_conference_files() -> List[Dict[str, Any]]:
|
| 10 |
"""Fetch all conference YAML files from ccfddl repository."""
|
| 11 |
+
|
| 12 |
# First get the directory listing from GitHub API
|
| 13 |
api_url = "https://api.github.com/repos/ccfddl/ccf-deadlines/contents/conference/AI"
|
| 14 |
response = requests.get(api_url)
|
| 15 |
files = response.json()
|
| 16 |
+
|
| 17 |
conferences = []
|
| 18 |
for file in files:
|
| 19 |
+
if file["name"].endswith(".yml"):
|
| 20 |
+
yaml_content = requests.get(file["download_url"]).text
|
| 21 |
conf_data = yaml.safe_load(yaml_content)
|
| 22 |
# The data is a list with a single item
|
| 23 |
if isinstance(conf_data, list) and len(conf_data) > 0:
|
| 24 |
conferences.append(conf_data[0])
|
| 25 |
+
|
| 26 |
return conferences
|
| 27 |
|
| 28 |
|
|
|
|
| 30 |
"""Parse various date formats and return start and end dates."""
|
| 31 |
# Remove the year if it appears at the end of the string
|
| 32 |
date_str = date_str.replace(f", {year}", "")
|
| 33 |
+
|
| 34 |
# Handle various date formats
|
| 35 |
try:
|
| 36 |
# Split into start and end dates
|
| 37 |
+
if " - " in date_str:
|
| 38 |
+
start, end = date_str.split(" - ")
|
| 39 |
+
elif "-" in date_str:
|
| 40 |
+
start, end = date_str.split("-")
|
| 41 |
else:
|
| 42 |
# For single date format like "May 19, 2025"
|
| 43 |
start = end = date_str
|
| 44 |
+
|
| 45 |
# Clean up month abbreviations
|
| 46 |
month_map = {
|
| 47 |
+
"Sept": "September", # Handle Sept before Sep
|
| 48 |
+
"Jan": "January",
|
| 49 |
+
"Feb": "February",
|
| 50 |
+
"Mar": "March",
|
| 51 |
+
"Apr": "April",
|
| 52 |
+
"Jun": "June",
|
| 53 |
+
"Jul": "July",
|
| 54 |
+
"Aug": "August",
|
| 55 |
+
"Sep": "September",
|
| 56 |
+
"Oct": "October",
|
| 57 |
+
"Nov": "November",
|
| 58 |
+
"Dec": "December",
|
| 59 |
}
|
| 60 |
+
|
| 61 |
# Create a set of all month names (full and abbreviated)
|
| 62 |
all_months = set(month_map.keys()) | set(month_map.values())
|
| 63 |
+
|
| 64 |
# Handle cases like "April 29-May 4"
|
| 65 |
has_month = any(month in end for month in all_months)
|
| 66 |
if not has_month:
|
|
|
|
| 68 |
start_parts = start.split()
|
| 69 |
if len(start_parts) >= 1:
|
| 70 |
end = f"{start_parts[0]} {end.strip()}"
|
| 71 |
+
|
| 72 |
# Replace month abbreviations
|
| 73 |
for abbr, full in month_map.items():
|
| 74 |
start = start.replace(abbr, full)
|
| 75 |
end = end.replace(abbr, full)
|
| 76 |
+
|
| 77 |
# Clean up any extra spaces
|
| 78 |
+
start = " ".join(start.split())
|
| 79 |
+
end = " ".join(end.split())
|
| 80 |
+
|
| 81 |
# Parse start date
|
| 82 |
start_date = datetime.strptime(f"{start}, {year}", "%B %d, %Y")
|
| 83 |
+
|
| 84 |
# Parse end date
|
| 85 |
end_date = datetime.strptime(f"{end}, {year}", "%B %d, %Y")
|
| 86 |
+
|
| 87 |
+
return start_date.strftime("%Y-%m-%d"), end_date.strftime("%Y-%m-%d")
|
| 88 |
+
|
| 89 |
except Exception as e:
|
| 90 |
raise ValueError(f"Could not parse date: {date_str} ({e})")
|
| 91 |
|
| 92 |
|
| 93 |
+
def transform_conference_data(
|
| 94 |
+
conferences: List[Dict[str, Any]],
|
| 95 |
+
) -> List[Dict[str, Any]]:
|
| 96 |
"""Transform ccfddl format to our format."""
|
| 97 |
transformed = []
|
| 98 |
current_year = datetime.now().year
|
| 99 |
+
|
| 100 |
for conf in conferences:
|
| 101 |
# Get the most recent or upcoming conference instance
|
| 102 |
recent_conf = None
|
| 103 |
+
if "confs" in conf:
|
| 104 |
+
for instance in conf["confs"]:
|
| 105 |
+
if instance["year"] >= current_year:
|
| 106 |
recent_conf = instance
|
| 107 |
break
|
| 108 |
+
|
| 109 |
if not recent_conf:
|
| 110 |
continue
|
| 111 |
+
|
| 112 |
# Transform to our format
|
| 113 |
transformed_conf = {
|
| 114 |
+
"title": conf.get("title", ""),
|
| 115 |
+
"year": recent_conf["year"],
|
| 116 |
+
"id": recent_conf["id"],
|
| 117 |
+
"full_name": conf.get("description", ""),
|
| 118 |
+
"link": recent_conf.get("link", ""),
|
| 119 |
+
"deadline": recent_conf.get("timeline", [{}])[0].get("deadline", ""),
|
| 120 |
+
"timezone": recent_conf.get("timezone", ""),
|
| 121 |
+
"date": recent_conf.get("date", ""),
|
| 122 |
+
"tags": [], # We'll need to maintain a mapping for tags
|
| 123 |
}
|
| 124 |
+
|
| 125 |
# Handle city and country fields instead of place
|
| 126 |
+
place = recent_conf.get("place", "")
|
| 127 |
if place:
|
| 128 |
# Try to parse the place into city and country if it contains a comma
|
| 129 |
+
if "," in place:
|
| 130 |
+
city, country = place.split(",", 1)
|
| 131 |
+
transformed_conf["city"] = city.strip()
|
| 132 |
+
transformed_conf["country"] = country.strip()
|
| 133 |
else:
|
| 134 |
# If we can't parse, just set the country
|
| 135 |
+
transformed_conf["country"] = place.strip()
|
| 136 |
+
|
| 137 |
# Add optional fields
|
| 138 |
+
timeline = recent_conf.get("timeline", [{}])[0]
|
| 139 |
+
if "abstract_deadline" in timeline:
|
| 140 |
+
transformed_conf["abstract_deadline"] = timeline["abstract_deadline"]
|
| 141 |
+
|
| 142 |
# Parse date range for start/end
|
| 143 |
try:
|
| 144 |
+
if transformed_conf["date"]:
|
| 145 |
start_date, end_date = parse_date_range(
|
| 146 |
+
transformed_conf["date"], str(transformed_conf["year"])
|
|
|
|
| 147 |
)
|
| 148 |
+
transformed_conf["start"] = start_date
|
| 149 |
+
transformed_conf["end"] = end_date
|
| 150 |
except Exception as e:
|
| 151 |
print(f"Warning: Could not parse date for {transformed_conf['title']}: {e}")
|
| 152 |
+
|
| 153 |
# Add rankings as separate field
|
| 154 |
+
if "rank" in conf:
|
| 155 |
rankings = []
|
| 156 |
+
for rank_type, rank_value in conf["rank"].items():
|
| 157 |
rankings.append(f"{rank_type.upper()}: {rank_value}")
|
| 158 |
if rankings:
|
| 159 |
+
transformed_conf["rankings"] = ", ".join(rankings)
|
| 160 |
+
|
| 161 |
transformed.append(transformed_conf)
|
| 162 |
+
|
| 163 |
return transformed
|
| 164 |
|
| 165 |
|
| 166 |
def load_all_current_conferences() -> Dict[str, List[Dict[str, Any]]]:
|
| 167 |
"""Load all current conferences from individual files."""
|
| 168 |
+
conferences_dir = "src/data/conferences"
|
| 169 |
conference_groups = {}
|
| 170 |
+
|
| 171 |
if not os.path.exists(conferences_dir):
|
| 172 |
return {}
|
| 173 |
+
|
| 174 |
for filename in os.listdir(conferences_dir):
|
| 175 |
+
if filename.endswith(".yml"):
|
| 176 |
filepath = os.path.join(conferences_dir, filename)
|
| 177 |
+
with open(filepath, "r") as f:
|
| 178 |
conferences = yaml.safe_load(f)
|
| 179 |
if conferences:
|
| 180 |
# Extract conference title from the first entry
|
| 181 |
+
title = conferences[0]["title"]
|
| 182 |
conference_groups[title] = conferences
|
| 183 |
+
|
| 184 |
return conference_groups
|
| 185 |
|
| 186 |
|
| 187 |
def create_filename_from_title(title: str) -> str:
|
| 188 |
"""Create a filename-safe version of the conference title."""
|
| 189 |
+
filename = re.sub(r"[^a-zA-Z0-9\s&()-]", "", title.lower())
|
| 190 |
+
filename = re.sub(r"\s+", "_", filename)
|
| 191 |
+
filename = filename.replace("&", "and")
|
| 192 |
+
filename = filename.strip("_")
|
| 193 |
return filename
|
| 194 |
|
| 195 |
|
| 196 |
def update_conference_loader():
|
| 197 |
"""Update the conference loader file with all current conferences."""
|
| 198 |
+
conferences_dir = "src/data/conferences"
|
| 199 |
+
loader_path = "src/utils/conferenceLoader.ts"
|
| 200 |
+
|
| 201 |
# Get all conference files
|
| 202 |
conference_files = []
|
| 203 |
if os.path.exists(conferences_dir):
|
| 204 |
for filename in sorted(os.listdir(conferences_dir)):
|
| 205 |
+
if filename.endswith(".yml"):
|
| 206 |
conference_files.append(filename)
|
| 207 |
+
|
| 208 |
# Generate import statements
|
| 209 |
imports = []
|
| 210 |
variable_names = []
|
| 211 |
+
|
| 212 |
for filename in conference_files:
|
| 213 |
# Create variable name from filename
|
| 214 |
+
var_name = filename.replace(".yml", "").replace("-", "_") + "Data"
|
| 215 |
variable_names.append(var_name)
|
| 216 |
imports.append(f"import {var_name} from '@/data/conferences/{filename}';")
|
| 217 |
+
|
| 218 |
# Generate the loader file content
|
| 219 |
loader_content = f"""import {{ Conference }} from '@/types/conference';
|
| 220 |
|
|
|
|
| 223 |
|
| 224 |
// Combine all conference data into a single array
|
| 225 |
const allConferencesData: Conference[] = [
|
| 226 |
+
{chr(10).join(f" ...{var_name}," for var_name in variable_names)}
|
| 227 |
];
|
| 228 |
|
| 229 |
export default allConferencesData;"""
|
| 230 |
+
|
| 231 |
# Write the loader file
|
| 232 |
+
with open(loader_path, "w") as f:
|
| 233 |
f.write(loader_content)
|
| 234 |
+
|
| 235 |
print(f"Updated conference loader with {len(conference_files)} conference files")
|
| 236 |
|
| 237 |
|
|
|
|
| 239 |
try:
|
| 240 |
# Load current conferences from individual files
|
| 241 |
current_conference_groups = load_all_current_conferences()
|
| 242 |
+
|
| 243 |
# Fetch and transform new data
|
| 244 |
new_conferences = fetch_conference_files()
|
| 245 |
if not new_conferences:
|
| 246 |
print("Warning: No conferences fetched from ccfddl")
|
| 247 |
return
|
| 248 |
+
|
| 249 |
transformed_conferences = transform_conference_data(new_conferences)
|
| 250 |
if not transformed_conferences:
|
| 251 |
print("Warning: No conferences transformed")
|
| 252 |
return
|
| 253 |
+
|
| 254 |
# Create conferences directory if it doesn't exist
|
| 255 |
+
conferences_dir = "src/data/conferences"
|
| 256 |
os.makedirs(conferences_dir, exist_ok=True)
|
| 257 |
+
|
| 258 |
# Group new conferences by title
|
| 259 |
new_conference_groups = {}
|
| 260 |
for conf in transformed_conferences:
|
| 261 |
+
title = conf["title"]
|
| 262 |
if title not in new_conference_groups:
|
| 263 |
new_conference_groups[title] = []
|
| 264 |
new_conference_groups[title].append(conf)
|
| 265 |
+
|
| 266 |
# Update each conference group
|
| 267 |
updated_count = 0
|
| 268 |
for title, new_confs in new_conference_groups.items():
|
| 269 |
+
filename = create_filename_from_title(title) + ".yml"
|
| 270 |
filepath = os.path.join(conferences_dir, filename)
|
| 271 |
+
|
| 272 |
# Get current conferences for this title
|
| 273 |
current_confs = current_conference_groups.get(title, [])
|
| 274 |
+
current_conf_dict = {conf["id"]: conf for conf in current_confs}
|
| 275 |
+
|
| 276 |
# Update or add new conferences
|
| 277 |
for new_conf in new_confs:
|
| 278 |
+
if new_conf["id"] in current_conf_dict:
|
| 279 |
# Update existing conference while preserving fields
|
| 280 |
+
curr_conf = current_conf_dict[new_conf["id"]]
|
| 281 |
+
|
| 282 |
# Preserve existing fields
|
| 283 |
preserved_fields = [
|
| 284 |
+
"tags",
|
| 285 |
+
"venue",
|
| 286 |
+
"hindex",
|
| 287 |
+
"submission_deadline",
|
| 288 |
+
"timezone_submission",
|
| 289 |
+
"rebuttal_period_start",
|
| 290 |
+
"rebuttal_period_end",
|
| 291 |
+
"final_decision_date",
|
| 292 |
+
"review_release_date",
|
| 293 |
+
"commitment_deadline",
|
| 294 |
+
"start",
|
| 295 |
+
"end",
|
| 296 |
+
"note",
|
| 297 |
+
"city",
|
| 298 |
+
"country",
|
| 299 |
+
"deadlines",
|
| 300 |
]
|
| 301 |
for field in preserved_fields:
|
| 302 |
if field in curr_conf:
|
| 303 |
new_conf[field] = curr_conf[field]
|
| 304 |
+
|
| 305 |
# Preserve existing rankings if available
|
| 306 |
+
if "rankings" in curr_conf:
|
| 307 |
+
new_conf["rankings"] = curr_conf["rankings"]
|
| 308 |
+
|
| 309 |
+
current_conf_dict[new_conf["id"]] = new_conf
|
| 310 |
else:
|
| 311 |
# Add new conference
|
| 312 |
+
current_conf_dict[new_conf["id"]] = new_conf
|
| 313 |
+
|
| 314 |
# Convert back to list and sort by year
|
| 315 |
all_confs = list(current_conf_dict.values())
|
| 316 |
+
all_confs.sort(key=lambda x: x.get("year", 9999))
|
| 317 |
+
|
| 318 |
# Write to individual file
|
| 319 |
+
with open(filepath, "w") as f:
|
| 320 |
+
yaml.dump(
|
| 321 |
+
all_confs,
|
| 322 |
+
f,
|
| 323 |
+
default_flow_style=False,
|
| 324 |
+
sort_keys=False,
|
| 325 |
+
allow_unicode=True,
|
| 326 |
+
)
|
| 327 |
+
|
| 328 |
updated_count += 1
|
| 329 |
print(f"Updated {filename} with {len(all_confs)} entries")
|
| 330 |
+
|
| 331 |
# Update the conference loader
|
| 332 |
update_conference_loader()
|
| 333 |
+
|
| 334 |
print(f"Successfully updated {updated_count} conference files")
|
| 335 |
+
|
| 336 |
except Exception as e:
|
| 337 |
print(f"Error: {e}")
|
| 338 |
raise
|
agents/agent.py
CHANGED
|
@@ -33,7 +33,11 @@ SCRIPT_DIR = Path(__file__).parent
|
|
| 33 |
# Project root directory - use current working directory if set (for Modal),
|
| 34 |
# otherwise use parent of agents/ directory (for local development)
|
| 35 |
# This allows Modal to clone the repo and chdir to it before importing this module
|
| 36 |
-
PROJECT_ROOT =
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
|
| 38 |
|
| 39 |
async def read_prompt(filename: str) -> str:
|
|
@@ -52,37 +56,41 @@ async def read_app_readme() -> str:
|
|
| 52 |
|
| 53 |
async def load_conference_data(conference_name: str) -> str:
|
| 54 |
"""Load conference data from YAML file.
|
| 55 |
-
|
| 56 |
Args:
|
| 57 |
conference_name: The name of the conference (e.g., 'neurips', 'aaai')
|
| 58 |
-
|
| 59 |
Returns:
|
| 60 |
The YAML content as a string, or an empty string if file not found.
|
| 61 |
"""
|
| 62 |
yaml_path = PROJECT_ROOT / "src" / "data" / "conferences" / f"{conference_name}.yml"
|
| 63 |
-
|
| 64 |
if not yaml_path.exists():
|
| 65 |
print(f"Warning: Conference file not found at {yaml_path}")
|
| 66 |
return ""
|
| 67 |
-
|
| 68 |
async with aiofiles.open(yaml_path, "r", encoding="utf-8") as f:
|
| 69 |
return await f.read()
|
| 70 |
|
| 71 |
|
| 72 |
-
def format_user_prompt(
|
|
|
|
|
|
|
| 73 |
"""Format the user prompt template with conference name and data.
|
| 74 |
-
|
| 75 |
Args:
|
| 76 |
template: The user prompt template with placeholders.
|
| 77 |
conference_name: The name of the conference.
|
| 78 |
conference_data: The YAML content of the conference data.
|
| 79 |
-
|
| 80 |
Returns:
|
| 81 |
The formatted user prompt.
|
| 82 |
"""
|
| 83 |
return template.format(
|
| 84 |
conference_name=conference_name,
|
| 85 |
-
conference_data=conference_data
|
|
|
|
|
|
|
| 86 |
)
|
| 87 |
|
| 88 |
|
|
@@ -96,19 +104,22 @@ async def find_conference_deadlines(conference_name: str) -> None:
|
|
| 96 |
|
| 97 |
# Load conference data from YAML file
|
| 98 |
conference_data = await load_conference_data(conference_name)
|
| 99 |
-
|
| 100 |
# Read app README for system prompt
|
| 101 |
app_readme = await read_app_readme()
|
| 102 |
-
|
| 103 |
# Read and format system prompt
|
| 104 |
system_prompt_template = await read_prompt("prompts/system_prompt.md")
|
| 105 |
-
from datetime import datetime
|
| 106 |
|
| 107 |
def format_date_verbose(dt: datetime) -> str:
|
| 108 |
# e.g. "Monday, the 1st of April, 2025"
|
| 109 |
day = dt.day
|
| 110 |
-
suffix =
|
| 111 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 112 |
|
| 113 |
system_prompt = system_prompt_template.format(
|
| 114 |
conference_name=conference_name,
|
|
@@ -130,14 +141,14 @@ async def find_conference_deadlines(conference_name: str) -> None:
|
|
| 130 |
# Fallback to home directory (for Modal non-root user)
|
| 131 |
settings_path = Path.home() / ".claude" / "settings.local.json"
|
| 132 |
settings_path = str(settings_path)
|
| 133 |
-
|
| 134 |
# Configure Exa MCP server for web search capabilities (only if API key is available)
|
| 135 |
# See: https://docs.exa.ai/reference/exa-mcp
|
| 136 |
# Note: On Modal, MCP causes claude-agent-sdk to exit early, so we disable it there
|
| 137 |
exa_api_key = os.environ.get("EXA_API_KEY", "")
|
| 138 |
disable_mcp = os.environ.get("DISABLE_EXA_MCP", "").lower() in ("1", "true", "yes")
|
| 139 |
mcp_servers: dict[str, McpHttpServerConfig] = {}
|
| 140 |
-
|
| 141 |
if disable_mcp:
|
| 142 |
print("Exa MCP disabled via DISABLE_EXA_MCP environment variable")
|
| 143 |
print("Using built-in WebSearch tool instead")
|
|
@@ -150,7 +161,7 @@ async def find_conference_deadlines(conference_name: str) -> None:
|
|
| 150 |
)
|
| 151 |
else:
|
| 152 |
print("EXA_API_KEY not found, Exa MCP will not be available")
|
| 153 |
-
|
| 154 |
# Only pass mcp_servers if we have any configured
|
| 155 |
# Passing empty dict or MCP servers can cause issues in some environments
|
| 156 |
options_kwargs = {
|
|
@@ -163,7 +174,7 @@ async def find_conference_deadlines(conference_name: str) -> None:
|
|
| 163 |
print(f"Configuring with MCP servers: {list(mcp_servers.keys())}")
|
| 164 |
else:
|
| 165 |
print("No MCP servers configured, using built-in tools only")
|
| 166 |
-
|
| 167 |
options = ClaudeAgentOptions(**options_kwargs)
|
| 168 |
|
| 169 |
# Run the agent query
|
|
@@ -216,14 +227,18 @@ async def find_conference_deadlines(conference_name: str) -> None:
|
|
| 216 |
# Get the tool name from our tracking dict
|
| 217 |
tool_name = tool_names.get(block.tool_use_id, "unknown")
|
| 218 |
# Truncate long results for readability
|
| 219 |
-
content_str =
|
|
|
|
|
|
|
| 220 |
if len(content_str) > 500:
|
| 221 |
content_str = content_str[:500] + "... (truncated)"
|
| 222 |
error_indicator = " [ERROR]" if block.is_error else ""
|
| 223 |
-
print(
|
|
|
|
|
|
|
| 224 |
elif isinstance(message, ResultMessage):
|
| 225 |
# Print result details
|
| 226 |
-
if hasattr(message,
|
| 227 |
print(f"[result] ERROR: {message.error}")
|
| 228 |
if message.total_cost_usd and message.total_cost_usd > 0:
|
| 229 |
print(f"\nCost: ${message.total_cost_usd:.4f}")
|
|
@@ -249,4 +264,4 @@ if __name__ == "__main__":
|
|
| 249 |
args = parser.parse_args()
|
| 250 |
conference_name = args.conference_name
|
| 251 |
|
| 252 |
-
asyncio.run(find_conference_deadlines(conference_name))
|
|
|
|
| 33 |
# Project root directory - use current working directory if set (for Modal),
|
| 34 |
# otherwise use parent of agents/ directory (for local development)
|
| 35 |
# This allows Modal to clone the repo and chdir to it before importing this module
|
| 36 |
+
PROJECT_ROOT = (
|
| 37 |
+
Path(os.getcwd())
|
| 38 |
+
if os.environ.get("USE_CWD_AS_PROJECT_ROOT")
|
| 39 |
+
else SCRIPT_DIR.parent
|
| 40 |
+
)
|
| 41 |
|
| 42 |
|
| 43 |
async def read_prompt(filename: str) -> str:
|
|
|
|
| 56 |
|
| 57 |
async def load_conference_data(conference_name: str) -> str:
|
| 58 |
"""Load conference data from YAML file.
|
| 59 |
+
|
| 60 |
Args:
|
| 61 |
conference_name: The name of the conference (e.g., 'neurips', 'aaai')
|
| 62 |
+
|
| 63 |
Returns:
|
| 64 |
The YAML content as a string, or an empty string if file not found.
|
| 65 |
"""
|
| 66 |
yaml_path = PROJECT_ROOT / "src" / "data" / "conferences" / f"{conference_name}.yml"
|
| 67 |
+
|
| 68 |
if not yaml_path.exists():
|
| 69 |
print(f"Warning: Conference file not found at {yaml_path}")
|
| 70 |
return ""
|
| 71 |
+
|
| 72 |
async with aiofiles.open(yaml_path, "r", encoding="utf-8") as f:
|
| 73 |
return await f.read()
|
| 74 |
|
| 75 |
|
| 76 |
+
def format_user_prompt(
|
| 77 |
+
template: str, conference_name: str, conference_data: str
|
| 78 |
+
) -> str:
|
| 79 |
"""Format the user prompt template with conference name and data.
|
| 80 |
+
|
| 81 |
Args:
|
| 82 |
template: The user prompt template with placeholders.
|
| 83 |
conference_name: The name of the conference.
|
| 84 |
conference_data: The YAML content of the conference data.
|
| 85 |
+
|
| 86 |
Returns:
|
| 87 |
The formatted user prompt.
|
| 88 |
"""
|
| 89 |
return template.format(
|
| 90 |
conference_name=conference_name,
|
| 91 |
+
conference_data=conference_data
|
| 92 |
+
if conference_data
|
| 93 |
+
else "No existing data found.",
|
| 94 |
)
|
| 95 |
|
| 96 |
|
|
|
|
| 104 |
|
| 105 |
# Load conference data from YAML file
|
| 106 |
conference_data = await load_conference_data(conference_name)
|
| 107 |
+
|
| 108 |
# Read app README for system prompt
|
| 109 |
app_readme = await read_app_readme()
|
| 110 |
+
|
| 111 |
# Read and format system prompt
|
| 112 |
system_prompt_template = await read_prompt("prompts/system_prompt.md")
|
|
|
|
| 113 |
|
| 114 |
def format_date_verbose(dt: datetime) -> str:
|
| 115 |
# e.g. "Monday, the 1st of April, 2025"
|
| 116 |
day = dt.day
|
| 117 |
+
suffix = (
|
| 118 |
+
"th" if 11 <= day <= 13 else {1: "st", 2: "nd", 3: "rd"}.get(day % 10, "th")
|
| 119 |
+
)
|
| 120 |
+
return (
|
| 121 |
+
f"{dt.strftime('%A')}, the {day}{suffix} of {dt.strftime('%B')}, {dt.year}"
|
| 122 |
+
)
|
| 123 |
|
| 124 |
system_prompt = system_prompt_template.format(
|
| 125 |
conference_name=conference_name,
|
|
|
|
| 141 |
# Fallback to home directory (for Modal non-root user)
|
| 142 |
settings_path = Path.home() / ".claude" / "settings.local.json"
|
| 143 |
settings_path = str(settings_path)
|
| 144 |
+
|
| 145 |
# Configure Exa MCP server for web search capabilities (only if API key is available)
|
| 146 |
# See: https://docs.exa.ai/reference/exa-mcp
|
| 147 |
# Note: On Modal, MCP causes claude-agent-sdk to exit early, so we disable it there
|
| 148 |
exa_api_key = os.environ.get("EXA_API_KEY", "")
|
| 149 |
disable_mcp = os.environ.get("DISABLE_EXA_MCP", "").lower() in ("1", "true", "yes")
|
| 150 |
mcp_servers: dict[str, McpHttpServerConfig] = {}
|
| 151 |
+
|
| 152 |
if disable_mcp:
|
| 153 |
print("Exa MCP disabled via DISABLE_EXA_MCP environment variable")
|
| 154 |
print("Using built-in WebSearch tool instead")
|
|
|
|
| 161 |
)
|
| 162 |
else:
|
| 163 |
print("EXA_API_KEY not found, Exa MCP will not be available")
|
| 164 |
+
|
| 165 |
# Only pass mcp_servers if we have any configured
|
| 166 |
# Passing empty dict or MCP servers can cause issues in some environments
|
| 167 |
options_kwargs = {
|
|
|
|
| 174 |
print(f"Configuring with MCP servers: {list(mcp_servers.keys())}")
|
| 175 |
else:
|
| 176 |
print("No MCP servers configured, using built-in tools only")
|
| 177 |
+
|
| 178 |
options = ClaudeAgentOptions(**options_kwargs)
|
| 179 |
|
| 180 |
# Run the agent query
|
|
|
|
| 227 |
# Get the tool name from our tracking dict
|
| 228 |
tool_name = tool_names.get(block.tool_use_id, "unknown")
|
| 229 |
# Truncate long results for readability
|
| 230 |
+
content_str = (
|
| 231 |
+
str(block.content) if block.content else "(empty)"
|
| 232 |
+
)
|
| 233 |
if len(content_str) > 500:
|
| 234 |
content_str = content_str[:500] + "... (truncated)"
|
| 235 |
error_indicator = " [ERROR]" if block.is_error else ""
|
| 236 |
+
print(
|
| 237 |
+
f"[result]{error_indicator} {tool_name}: {content_str}"
|
| 238 |
+
)
|
| 239 |
elif isinstance(message, ResultMessage):
|
| 240 |
# Print result details
|
| 241 |
+
if hasattr(message, "error") and message.error:
|
| 242 |
print(f"[result] ERROR: {message.error}")
|
| 243 |
if message.total_cost_usd and message.total_cost_usd > 0:
|
| 244 |
print(f"\nCost: ${message.total_cost_usd:.4f}")
|
|
|
|
| 264 |
args = parser.parse_args()
|
| 265 |
conference_name = args.conference_name
|
| 266 |
|
| 267 |
+
asyncio.run(find_conference_deadlines(conference_name))
|
agents/modal_agent.py
CHANGED
|
@@ -43,22 +43,21 @@ CONFERENCES_DIR = "src/data/conferences"
|
|
| 43 |
|
| 44 |
def get_conferences(base_dir: str = REPO_DIR) -> list[str]:
|
| 45 |
"""Get list of all conferences by reading yml files from the conferences directory.
|
| 46 |
-
|
| 47 |
Args:
|
| 48 |
base_dir: Base directory of the repository.
|
| 49 |
-
|
| 50 |
Returns:
|
| 51 |
Sorted list of conference names (yml filenames without extension).
|
| 52 |
"""
|
| 53 |
conferences_path = Path(base_dir) / CONFERENCES_DIR
|
| 54 |
if not conferences_path.exists():
|
| 55 |
raise FileNotFoundError(f"Conferences directory not found: {conferences_path}")
|
| 56 |
-
|
| 57 |
-
conferences = [
|
| 58 |
-
f.stem for f in conferences_path.glob("*.yml")
|
| 59 |
-
]
|
| 60 |
return sorted(conferences)
|
| 61 |
|
|
|
|
| 62 |
# Define the Modal image with all required dependencies
|
| 63 |
image = (
|
| 64 |
modal.Image.debian_slim(python_version="3.11")
|
|
@@ -160,7 +159,7 @@ def setup_git_and_clone():
|
|
| 160 |
@app.function(timeout=600)
|
| 161 |
def process_single_conference(conference_name: str) -> dict:
|
| 162 |
"""Process a single conference using the Claude Agent SDK.
|
| 163 |
-
|
| 164 |
The agent will update the conference data and handle git add/commit/push.
|
| 165 |
|
| 166 |
Args:
|
|
@@ -181,10 +180,10 @@ def process_single_conference(conference_name: str) -> dict:
|
|
| 181 |
os.environ["HOME"] = agent_user.pw_dir
|
| 182 |
os.environ["USER"] = "agent"
|
| 183 |
os.environ["LOGNAME"] = "agent"
|
| 184 |
-
|
| 185 |
# Ensure subprocess inherits correct user context
|
| 186 |
os.environ["SHELL"] = "/bin/bash"
|
| 187 |
-
|
| 188 |
# Disable MCP for now - known issue where MCP causes SDK to exit early on Modal
|
| 189 |
# The agent will use built-in WebSearch tool instead
|
| 190 |
# See MODAL_DEBUGGING.md for details
|
|
@@ -200,7 +199,7 @@ def process_single_conference(conference_name: str) -> dict:
|
|
| 200 |
|
| 201 |
# Change to repo directory so relative paths work
|
| 202 |
os.chdir(REPO_DIR)
|
| 203 |
-
|
| 204 |
# Tell agent.py to use current working directory as PROJECT_ROOT
|
| 205 |
# This ensures conference data is read from the cloned repo, not the mounted app directory
|
| 206 |
os.environ["USE_CWD_AS_PROJECT_ROOT"] = "1"
|
|
@@ -228,7 +227,7 @@ def process_single_conference(conference_name: str) -> dict:
|
|
| 228 |
@app.function(timeout=43200) # 12 hours max for all conferences
|
| 229 |
def process_all_conferences() -> list[dict]:
|
| 230 |
"""Process all conferences sequentially.
|
| 231 |
-
|
| 232 |
Each conference is processed one at a time. The agent handles
|
| 233 |
git add/commit/push for each conference via its Bash tool.
|
| 234 |
|
|
@@ -236,16 +235,16 @@ def process_all_conferences() -> list[dict]:
|
|
| 236 |
List of results for each processed conference.
|
| 237 |
"""
|
| 238 |
import pwd
|
| 239 |
-
|
| 240 |
# Switch to non-root user (required for git operations)
|
| 241 |
agent_user = pwd.getpwnam("agent")
|
| 242 |
os.setgid(agent_user.pw_gid)
|
| 243 |
os.setuid(agent_user.pw_uid)
|
| 244 |
os.environ["HOME"] = agent_user.pw_dir
|
| 245 |
-
|
| 246 |
# Clone repo first to get the list of conferences
|
| 247 |
setup_git_and_clone()
|
| 248 |
-
|
| 249 |
# Get conferences from yml files in the cloned repo
|
| 250 |
conferences = get_conferences()
|
| 251 |
results = []
|
|
@@ -263,11 +262,13 @@ def process_all_conferences() -> list[dict]:
|
|
| 263 |
|
| 264 |
except Exception as e:
|
| 265 |
print(f"Error processing {conference}: {e}")
|
| 266 |
-
results.append(
|
| 267 |
-
|
| 268 |
-
|
| 269 |
-
|
| 270 |
-
|
|
|
|
|
|
|
| 271 |
|
| 272 |
print(f"\n{'=' * 60}")
|
| 273 |
print(f"Completed processing {len(conferences)} conferences")
|
|
@@ -284,15 +285,15 @@ def scheduled_run():
|
|
| 284 |
"""Scheduled weekly run of all conferences."""
|
| 285 |
print("Starting scheduled weekly conference update...")
|
| 286 |
results = process_all_conferences.remote()
|
| 287 |
-
|
| 288 |
# Summary
|
| 289 |
completed = sum(1 for r in results if r.get("status") == "completed")
|
| 290 |
errors = sum(1 for r in results if r.get("status") == "error")
|
| 291 |
-
|
| 292 |
-
print(
|
| 293 |
print(f" - Completed: {completed}")
|
| 294 |
print(f" - Errors: {errors}")
|
| 295 |
-
|
| 296 |
return results
|
| 297 |
|
| 298 |
|
|
@@ -335,13 +336,13 @@ def main(
|
|
| 335 |
print(f"\n{'=' * 60}")
|
| 336 |
print("Summary:")
|
| 337 |
print(f"{'=' * 60}")
|
| 338 |
-
|
| 339 |
completed = [r for r in results if r.get("status") == "completed"]
|
| 340 |
errors = [r for r in results if r.get("status") == "error"]
|
| 341 |
-
|
| 342 |
print(f"Completed: {len(completed)}")
|
| 343 |
print(f"Errors: {len(errors)}")
|
| 344 |
-
|
| 345 |
if errors:
|
| 346 |
print("\nErrors:")
|
| 347 |
for r in errors:
|
|
|
|
| 43 |
|
| 44 |
def get_conferences(base_dir: str = REPO_DIR) -> list[str]:
|
| 45 |
"""Get list of all conferences by reading yml files from the conferences directory.
|
| 46 |
+
|
| 47 |
Args:
|
| 48 |
base_dir: Base directory of the repository.
|
| 49 |
+
|
| 50 |
Returns:
|
| 51 |
Sorted list of conference names (yml filenames without extension).
|
| 52 |
"""
|
| 53 |
conferences_path = Path(base_dir) / CONFERENCES_DIR
|
| 54 |
if not conferences_path.exists():
|
| 55 |
raise FileNotFoundError(f"Conferences directory not found: {conferences_path}")
|
| 56 |
+
|
| 57 |
+
conferences = [f.stem for f in conferences_path.glob("*.yml")]
|
|
|
|
|
|
|
| 58 |
return sorted(conferences)
|
| 59 |
|
| 60 |
+
|
| 61 |
# Define the Modal image with all required dependencies
|
| 62 |
image = (
|
| 63 |
modal.Image.debian_slim(python_version="3.11")
|
|
|
|
| 159 |
@app.function(timeout=600)
|
| 160 |
def process_single_conference(conference_name: str) -> dict:
|
| 161 |
"""Process a single conference using the Claude Agent SDK.
|
| 162 |
+
|
| 163 |
The agent will update the conference data and handle git add/commit/push.
|
| 164 |
|
| 165 |
Args:
|
|
|
|
| 180 |
os.environ["HOME"] = agent_user.pw_dir
|
| 181 |
os.environ["USER"] = "agent"
|
| 182 |
os.environ["LOGNAME"] = "agent"
|
| 183 |
+
|
| 184 |
# Ensure subprocess inherits correct user context
|
| 185 |
os.environ["SHELL"] = "/bin/bash"
|
| 186 |
+
|
| 187 |
# Disable MCP for now - known issue where MCP causes SDK to exit early on Modal
|
| 188 |
# The agent will use built-in WebSearch tool instead
|
| 189 |
# See MODAL_DEBUGGING.md for details
|
|
|
|
| 199 |
|
| 200 |
# Change to repo directory so relative paths work
|
| 201 |
os.chdir(REPO_DIR)
|
| 202 |
+
|
| 203 |
# Tell agent.py to use current working directory as PROJECT_ROOT
|
| 204 |
# This ensures conference data is read from the cloned repo, not the mounted app directory
|
| 205 |
os.environ["USE_CWD_AS_PROJECT_ROOT"] = "1"
|
|
|
|
| 227 |
@app.function(timeout=43200) # 12 hours max for all conferences
|
| 228 |
def process_all_conferences() -> list[dict]:
|
| 229 |
"""Process all conferences sequentially.
|
| 230 |
+
|
| 231 |
Each conference is processed one at a time. The agent handles
|
| 232 |
git add/commit/push for each conference via its Bash tool.
|
| 233 |
|
|
|
|
| 235 |
List of results for each processed conference.
|
| 236 |
"""
|
| 237 |
import pwd
|
| 238 |
+
|
| 239 |
# Switch to non-root user (required for git operations)
|
| 240 |
agent_user = pwd.getpwnam("agent")
|
| 241 |
os.setgid(agent_user.pw_gid)
|
| 242 |
os.setuid(agent_user.pw_uid)
|
| 243 |
os.environ["HOME"] = agent_user.pw_dir
|
| 244 |
+
|
| 245 |
# Clone repo first to get the list of conferences
|
| 246 |
setup_git_and_clone()
|
| 247 |
+
|
| 248 |
# Get conferences from yml files in the cloned repo
|
| 249 |
conferences = get_conferences()
|
| 250 |
results = []
|
|
|
|
| 262 |
|
| 263 |
except Exception as e:
|
| 264 |
print(f"Error processing {conference}: {e}")
|
| 265 |
+
results.append(
|
| 266 |
+
{
|
| 267 |
+
"conference": conference,
|
| 268 |
+
"status": "error",
|
| 269 |
+
"error": str(e),
|
| 270 |
+
}
|
| 271 |
+
)
|
| 272 |
|
| 273 |
print(f"\n{'=' * 60}")
|
| 274 |
print(f"Completed processing {len(conferences)} conferences")
|
|
|
|
| 285 |
"""Scheduled weekly run of all conferences."""
|
| 286 |
print("Starting scheduled weekly conference update...")
|
| 287 |
results = process_all_conferences.remote()
|
| 288 |
+
|
| 289 |
# Summary
|
| 290 |
completed = sum(1 for r in results if r.get("status") == "completed")
|
| 291 |
errors = sum(1 for r in results if r.get("status") == "error")
|
| 292 |
+
|
| 293 |
+
print("\nWeekly run completed:")
|
| 294 |
print(f" - Completed: {completed}")
|
| 295 |
print(f" - Errors: {errors}")
|
| 296 |
+
|
| 297 |
return results
|
| 298 |
|
| 299 |
|
|
|
|
| 336 |
print(f"\n{'=' * 60}")
|
| 337 |
print("Summary:")
|
| 338 |
print(f"{'=' * 60}")
|
| 339 |
+
|
| 340 |
completed = [r for r in results if r.get("status") == "completed"]
|
| 341 |
errors = [r for r in results if r.get("status") == "error"]
|
| 342 |
+
|
| 343 |
print(f"Completed: {len(completed)}")
|
| 344 |
print(f"Errors: {len(errors)}")
|
| 345 |
+
|
| 346 |
if errors:
|
| 347 |
print("\nErrors:")
|
| 348 |
for r in errors:
|
pyproject.toml
CHANGED
|
@@ -7,4 +7,5 @@ requires-python = ">=3.12"
|
|
| 7 |
dependencies = [
|
| 8 |
"aiofiles>=25.1.0",
|
| 9 |
"claude-agent-sdk>=0.1.18",
|
|
|
|
| 10 |
]
|
|
|
|
| 7 |
dependencies = [
|
| 8 |
"aiofiles>=25.1.0",
|
| 9 |
"claude-agent-sdk>=0.1.18",
|
| 10 |
+
"ruff>=0.14.10",
|
| 11 |
]
|
uv.lock
CHANGED
|
@@ -9,12 +9,14 @@ source = { virtual = "." }
|
|
| 9 |
dependencies = [
|
| 10 |
{ name = "aiofiles" },
|
| 11 |
{ name = "claude-agent-sdk" },
|
|
|
|
| 12 |
]
|
| 13 |
|
| 14 |
[package.metadata]
|
| 15 |
requires-dist = [
|
| 16 |
{ name = "aiofiles", specifier = ">=25.1.0" },
|
| 17 |
{ name = "claude-agent-sdk", specifier = ">=0.1.18" },
|
|
|
|
| 18 |
]
|
| 19 |
|
| 20 |
[[package]]
|
|
@@ -571,6 +573,32 @@ wheels = [
|
|
| 571 |
{ url = "https://files.pythonhosted.org/packages/d0/02/fa464cdfbe6b26e0600b62c528b72d8608f5cc49f96b8d6e38c95d60c676/rpds_py-0.30.0-cp314-cp314t-win_amd64.whl", hash = "sha256:27f4b0e92de5bfbc6f86e43959e6edd1425c33b5e69aab0984a72047f2bcf1e3", size = 226532 },
|
| 572 |
]
|
| 573 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 574 |
[[package]]
|
| 575 |
name = "sse-starlette"
|
| 576 |
version = "3.0.4"
|
|
|
|
| 9 |
dependencies = [
|
| 10 |
{ name = "aiofiles" },
|
| 11 |
{ name = "claude-agent-sdk" },
|
| 12 |
+
{ name = "ruff" },
|
| 13 |
]
|
| 14 |
|
| 15 |
[package.metadata]
|
| 16 |
requires-dist = [
|
| 17 |
{ name = "aiofiles", specifier = ">=25.1.0" },
|
| 18 |
{ name = "claude-agent-sdk", specifier = ">=0.1.18" },
|
| 19 |
+
{ name = "ruff", specifier = ">=0.14.10" },
|
| 20 |
]
|
| 21 |
|
| 22 |
[[package]]
|
|
|
|
| 573 |
{ url = "https://files.pythonhosted.org/packages/d0/02/fa464cdfbe6b26e0600b62c528b72d8608f5cc49f96b8d6e38c95d60c676/rpds_py-0.30.0-cp314-cp314t-win_amd64.whl", hash = "sha256:27f4b0e92de5bfbc6f86e43959e6edd1425c33b5e69aab0984a72047f2bcf1e3", size = 226532 },
|
| 574 |
]
|
| 575 |
|
| 576 |
+
[[package]]
|
| 577 |
+
name = "ruff"
|
| 578 |
+
version = "0.14.10"
|
| 579 |
+
source = { registry = "https://pypi.org/simple" }
|
| 580 |
+
sdist = { url = "https://files.pythonhosted.org/packages/57/08/52232a877978dd8f9cf2aeddce3e611b40a63287dfca29b6b8da791f5e8d/ruff-0.14.10.tar.gz", hash = "sha256:9a2e830f075d1a42cd28420d7809ace390832a490ed0966fe373ba288e77aaf4", size = 5859763 }
|
| 581 |
+
wheels = [
|
| 582 |
+
{ url = "https://files.pythonhosted.org/packages/60/01/933704d69f3f05ee16ef11406b78881733c186fe14b6a46b05cfcaf6d3b2/ruff-0.14.10-py3-none-linux_armv6l.whl", hash = "sha256:7a3ce585f2ade3e1f29ec1b92df13e3da262178df8c8bdf876f48fa0e8316c49", size = 13527080 },
|
| 583 |
+
{ url = "https://files.pythonhosted.org/packages/df/58/a0349197a7dfa603ffb7f5b0470391efa79ddc327c1e29c4851e85b09cc5/ruff-0.14.10-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:674f9be9372907f7257c51f1d4fc902cb7cf014b9980152b802794317941f08f", size = 13797320 },
|
| 584 |
+
{ url = "https://files.pythonhosted.org/packages/7b/82/36be59f00a6082e38c23536df4e71cdbc6af8d7c707eade97fcad5c98235/ruff-0.14.10-py3-none-macosx_11_0_arm64.whl", hash = "sha256:d85713d522348837ef9df8efca33ccb8bd6fcfc86a2cde3ccb4bc9d28a18003d", size = 12918434 },
|
| 585 |
+
{ url = "https://files.pythonhosted.org/packages/a6/00/45c62a7f7e34da92a25804f813ebe05c88aa9e0c25e5cb5a7d23dd7450e3/ruff-0.14.10-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6987ebe0501ae4f4308d7d24e2d0fe3d7a98430f5adfd0f1fead050a740a3a77", size = 13371961 },
|
| 586 |
+
{ url = "https://files.pythonhosted.org/packages/40/31/a5906d60f0405f7e57045a70f2d57084a93ca7425f22e1d66904769d1628/ruff-0.14.10-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:16a01dfb7b9e4eee556fbfd5392806b1b8550c9b4a9f6acd3dbe6812b193c70a", size = 13275629 },
|
| 587 |
+
{ url = "https://files.pythonhosted.org/packages/3e/60/61c0087df21894cf9d928dc04bcd4fb10e8b2e8dca7b1a276ba2155b2002/ruff-0.14.10-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7165d31a925b7a294465fa81be8c12a0e9b60fb02bf177e79067c867e71f8b1f", size = 14029234 },
|
| 588 |
+
{ url = "https://files.pythonhosted.org/packages/44/84/77d911bee3b92348b6e5dab5a0c898d87084ea03ac5dc708f46d88407def/ruff-0.14.10-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:c561695675b972effb0c0a45db233f2c816ff3da8dcfbe7dfc7eed625f218935", size = 15449890 },
|
| 589 |
+
{ url = "https://files.pythonhosted.org/packages/e9/36/480206eaefa24a7ec321582dda580443a8f0671fdbf6b1c80e9c3e93a16a/ruff-0.14.10-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4bb98fcbbc61725968893682fd4df8966a34611239c9fd07a1f6a07e7103d08e", size = 15123172 },
|
| 590 |
+
{ url = "https://files.pythonhosted.org/packages/5c/38/68e414156015ba80cef5473d57919d27dfb62ec804b96180bafdeaf0e090/ruff-0.14.10-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f24b47993a9d8cb858429e97bdf8544c78029f09b520af615c1d261bf827001d", size = 14460260 },
|
| 591 |
+
{ url = "https://files.pythonhosted.org/packages/b3/19/9e050c0dca8aba824d67cc0db69fb459c28d8cd3f6855b1405b3f29cc91d/ruff-0.14.10-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:59aabd2e2c4fd614d2862e7939c34a532c04f1084476d6833dddef4afab87e9f", size = 14229978 },
|
| 592 |
+
{ url = "https://files.pythonhosted.org/packages/51/eb/e8dd1dd6e05b9e695aa9dd420f4577debdd0f87a5ff2fedda33c09e9be8c/ruff-0.14.10-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:213db2b2e44be8625002dbea33bb9c60c66ea2c07c084a00d55732689d697a7f", size = 14338036 },
|
| 593 |
+
{ url = "https://files.pythonhosted.org/packages/6a/12/f3e3a505db7c19303b70af370d137795fcfec136d670d5de5391e295c134/ruff-0.14.10-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:b914c40ab64865a17a9a5b67911d14df72346a634527240039eb3bd650e5979d", size = 13264051 },
|
| 594 |
+
{ url = "https://files.pythonhosted.org/packages/08/64/8c3a47eaccfef8ac20e0484e68e0772013eb85802f8a9f7603ca751eb166/ruff-0.14.10-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:1484983559f026788e3a5c07c81ef7d1e97c1c78ed03041a18f75df104c45405", size = 13283998 },
|
| 595 |
+
{ url = "https://files.pythonhosted.org/packages/12/84/534a5506f4074e5cc0529e5cd96cfc01bb480e460c7edf5af70d2bcae55e/ruff-0.14.10-py3-none-musllinux_1_2_i686.whl", hash = "sha256:c70427132db492d25f982fffc8d6c7535cc2fd2c83fc8888f05caaa248521e60", size = 13601891 },
|
| 596 |
+
{ url = "https://files.pythonhosted.org/packages/0d/1e/14c916087d8598917dbad9b2921d340f7884824ad6e9c55de948a93b106d/ruff-0.14.10-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:5bcf45b681e9f1ee6445d317ce1fa9d6cba9a6049542d1c3d5b5958986be8830", size = 14336660 },
|
| 597 |
+
{ url = "https://files.pythonhosted.org/packages/f2/1c/d7b67ab43f30013b47c12b42d1acd354c195351a3f7a1d67f59e54227ede/ruff-0.14.10-py3-none-win32.whl", hash = "sha256:104c49fc7ab73f3f3a758039adea978869a918f31b73280db175b43a2d9b51d6", size = 13196187 },
|
| 598 |
+
{ url = "https://files.pythonhosted.org/packages/fb/9c/896c862e13886fae2af961bef3e6312db9ebc6adc2b156fe95e615dee8c1/ruff-0.14.10-py3-none-win_amd64.whl", hash = "sha256:466297bd73638c6bdf06485683e812db1c00c7ac96d4ddd0294a338c62fdc154", size = 14661283 },
|
| 599 |
+
{ url = "https://files.pythonhosted.org/packages/74/31/b0e29d572670dca3674eeee78e418f20bdf97fa8aa9ea71380885e175ca0/ruff-0.14.10-py3-none-win_arm64.whl", hash = "sha256:e51d046cf6dda98a4633b8a8a771451107413b0f07183b2bef03f075599e44e6", size = 13729839 },
|
| 600 |
+
]
|
| 601 |
+
|
| 602 |
[[package]]
|
| 603 |
name = "sse-starlette"
|
| 604 |
version = "3.0.4"
|