Spaces:
Running
Running
Upload historical_sessions.py
Browse files- historical_sessions.py +408 -0
historical_sessions.py
ADDED
|
@@ -0,0 +1,408 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import json
|
| 2 |
+
import os
|
| 3 |
+
from datetime import date, datetime, time as dt_time, timedelta
|
| 4 |
+
from pathlib import Path
|
| 5 |
+
from typing import Iterable, List, Optional
|
| 6 |
+
|
| 7 |
+
import numpy as np
|
| 8 |
+
import pandas as pd
|
| 9 |
+
|
| 10 |
+
from cinema_api_client import fetch_canonical_movie_names
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
ROOT_DIR = Path(__file__).resolve().parent
|
| 14 |
+
STATE_DIR = ROOT_DIR / "cinema_cache"
|
| 15 |
+
LOCAL_HISTORY_FILE = STATE_DIR / "historical_sessions.csv"
|
| 16 |
+
LOCAL_HISTORY_MANIFEST_FILE = STATE_DIR / "historical_sessions_manifest.json"
|
| 17 |
+
LEGACY_HISTORY_FILE = ROOT_DIR / "persistent_data.csv"
|
| 18 |
+
|
| 19 |
+
HISTORY_COLUMNS = [
|
| 20 |
+
"showId",
|
| 21 |
+
"影片名称",
|
| 22 |
+
"影片名称_清理后",
|
| 23 |
+
"放映日期",
|
| 24 |
+
"放映时间",
|
| 25 |
+
"影厅",
|
| 26 |
+
"座位数",
|
| 27 |
+
"总收入",
|
| 28 |
+
"总人次",
|
| 29 |
+
"场次",
|
| 30 |
+
"影片时长(分钟)",
|
| 31 |
+
"影片时长档位",
|
| 32 |
+
"影片时长类型",
|
| 33 |
+
"影片编码",
|
| 34 |
+
"影片语言",
|
| 35 |
+
"影片制式",
|
| 36 |
+
]
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
def ensure_state_dir() -> None:
|
| 40 |
+
STATE_DIR.mkdir(parents=True, exist_ok=True)
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
def clean_movie_title(raw_title, canonical_names=None):
|
| 44 |
+
if not isinstance(raw_title, str):
|
| 45 |
+
return raw_title
|
| 46 |
+
|
| 47 |
+
base_name = None
|
| 48 |
+
if canonical_names:
|
| 49 |
+
sorted_names = sorted(canonical_names, key=len, reverse=True)
|
| 50 |
+
for name in sorted_names:
|
| 51 |
+
if name in raw_title:
|
| 52 |
+
base_name = name
|
| 53 |
+
break
|
| 54 |
+
|
| 55 |
+
if not base_name:
|
| 56 |
+
base_name = raw_title.split(" ", 1)[0]
|
| 57 |
+
|
| 58 |
+
raw_upper = raw_title.upper()
|
| 59 |
+
suffix = ""
|
| 60 |
+
if "HDR LED" in raw_upper:
|
| 61 |
+
suffix = "(HDR LED)"
|
| 62 |
+
elif "CINITY" in raw_upper:
|
| 63 |
+
suffix = "(CINITY)"
|
| 64 |
+
elif "杜比" in raw_upper or "DOLBY" in raw_upper:
|
| 65 |
+
suffix = "(杜比视界)"
|
| 66 |
+
elif "IMAX" in raw_upper:
|
| 67 |
+
suffix = "(数字IMAX3D)" if "3D" in raw_upper else "(数字IMAX)"
|
| 68 |
+
elif "巨幕" in raw_upper:
|
| 69 |
+
suffix = "(中国巨幕立体)" if "立体" in raw_upper else "(中国巨幕)"
|
| 70 |
+
elif "3D" in raw_upper:
|
| 71 |
+
suffix = "(数字3D)"
|
| 72 |
+
|
| 73 |
+
if suffix and suffix not in base_name:
|
| 74 |
+
return f"{base_name}{suffix}"
|
| 75 |
+
return base_name
|
| 76 |
+
|
| 77 |
+
|
| 78 |
+
def round_minutes_to_10min(minutes):
|
| 79 |
+
numeric_value = pd.to_numeric(minutes, errors="coerce")
|
| 80 |
+
if pd.isna(numeric_value) or float(numeric_value) <= 0:
|
| 81 |
+
return np.nan
|
| 82 |
+
return int(np.floor((float(numeric_value) + 5) / 10) * 10)
|
| 83 |
+
|
| 84 |
+
|
| 85 |
+
def create_duration_label(minutes):
|
| 86 |
+
if pd.isna(minutes):
|
| 87 |
+
return np.nan
|
| 88 |
+
return f"{int(minutes)}分钟档"
|
| 89 |
+
|
| 90 |
+
|
| 91 |
+
def create_empty_history_df() -> pd.DataFrame:
|
| 92 |
+
data = {column: pd.Series(dtype="object") for column in HISTORY_COLUMNS}
|
| 93 |
+
data["放映日期"] = pd.Series(dtype="datetime64[ns]")
|
| 94 |
+
data["座位数"] = pd.Series(dtype="int64")
|
| 95 |
+
data["总收入"] = pd.Series(dtype="float64")
|
| 96 |
+
data["总人次"] = pd.Series(dtype="int64")
|
| 97 |
+
data["场次"] = pd.Series(dtype="int64")
|
| 98 |
+
data["影片时长(分钟)"] = pd.Series(dtype="float64")
|
| 99 |
+
data["影片时长档位"] = pd.Series(dtype="float64")
|
| 100 |
+
return pd.DataFrame(data)
|
| 101 |
+
|
| 102 |
+
|
| 103 |
+
def normalize_time_value(value):
|
| 104 |
+
if pd.isna(value):
|
| 105 |
+
return None
|
| 106 |
+
if isinstance(value, datetime):
|
| 107 |
+
return value.time().replace(second=0, microsecond=0)
|
| 108 |
+
if isinstance(value, dt_time):
|
| 109 |
+
return value.replace(second=0, microsecond=0)
|
| 110 |
+
|
| 111 |
+
numeric_value = pd.to_numeric(pd.Series([value]), errors="coerce").iloc[0]
|
| 112 |
+
if pd.notna(numeric_value) and 0 <= float(numeric_value) < 1:
|
| 113 |
+
total_minutes = int(round(float(numeric_value) * 24 * 60)) % (24 * 60)
|
| 114 |
+
return (datetime.min + timedelta(minutes=total_minutes)).time()
|
| 115 |
+
|
| 116 |
+
parsed = pd.to_datetime(str(value), errors="coerce")
|
| 117 |
+
if pd.isna(parsed):
|
| 118 |
+
return None
|
| 119 |
+
return parsed.time().replace(second=0, microsecond=0)
|
| 120 |
+
|
| 121 |
+
|
| 122 |
+
def _normalize_history_df(df: Optional[pd.DataFrame]) -> pd.DataFrame:
|
| 123 |
+
if df is None or df.empty:
|
| 124 |
+
return create_empty_history_df()
|
| 125 |
+
|
| 126 |
+
normalized = df.copy()
|
| 127 |
+
for column in HISTORY_COLUMNS:
|
| 128 |
+
if column not in normalized.columns:
|
| 129 |
+
normalized[column] = np.nan
|
| 130 |
+
|
| 131 |
+
normalized["影片名称"] = normalized["影片名称"].astype(str).str.strip()
|
| 132 |
+
normalized = normalized[normalized["影片名称"].ne("") & normalized["影片名称"].ne("nan")].copy()
|
| 133 |
+
normalized["影片名称_清理后"] = normalized["影片名称_清理后"].where(
|
| 134 |
+
normalized["影片名称_清理后"].notna(),
|
| 135 |
+
normalized["影片名称"].apply(clean_movie_title),
|
| 136 |
+
)
|
| 137 |
+
normalized["影片名称_清理后"] = normalized["影片名称_清理后"].astype(str).str.strip()
|
| 138 |
+
|
| 139 |
+
normalized["放映日期"] = pd.to_datetime(normalized["放映日期"], errors="coerce").dt.normalize()
|
| 140 |
+
normalized["放映时间"] = normalized["放映时间"].apply(normalize_time_value)
|
| 141 |
+
|
| 142 |
+
for column in ["座位数", "总人次", "场次"]:
|
| 143 |
+
normalized[column] = pd.to_numeric(normalized[column], errors="coerce").fillna(0).round().astype(int)
|
| 144 |
+
normalized["总收入"] = pd.to_numeric(normalized["总收入"], errors="coerce").fillna(0.0).astype(float)
|
| 145 |
+
normalized["影片时长(分钟)"] = pd.to_numeric(normalized["影片时长(分钟)"], errors="coerce")
|
| 146 |
+
normalized = normalized[
|
| 147 |
+
(normalized["影片时长(分钟)"].isna()) |
|
| 148 |
+
((normalized["影片时长(分钟)"] > 0) & (normalized["影片时长(分钟)"] <= 400))
|
| 149 |
+
].copy()
|
| 150 |
+
normalized["影片时长档位"] = normalized["影片时长(分钟)"].apply(round_minutes_to_10min)
|
| 151 |
+
normalized["影片时长类型"] = normalized["影片时长档位"].apply(create_duration_label)
|
| 152 |
+
|
| 153 |
+
normalized["影厅"] = normalized["影厅"].fillna("").astype(str).str.strip()
|
| 154 |
+
normalized["showId"] = normalized["showId"].fillna("").astype(str).str.strip()
|
| 155 |
+
normalized["影片编码"] = normalized["影片编码"].fillna("").astype(str).str.strip()
|
| 156 |
+
normalized["影片语言"] = normalized["影片语言"].fillna("").astype(str).str.strip()
|
| 157 |
+
normalized["影片制式"] = normalized["影片制式"].fillna("").astype(str).str.strip()
|
| 158 |
+
|
| 159 |
+
normalized = normalized.dropna(subset=["放映日期", "放映时间"]).copy()
|
| 160 |
+
normalized["放映时间_str"] = normalized["放映时间"].apply(lambda value: value.strftime("%H:%M:%S") if isinstance(value, dt_time) else "")
|
| 161 |
+
|
| 162 |
+
with_show_id = normalized[normalized["showId"].ne("")].copy()
|
| 163 |
+
without_show_id = normalized[normalized["showId"].eq("")].copy()
|
| 164 |
+
|
| 165 |
+
if not with_show_id.empty:
|
| 166 |
+
with_show_id = with_show_id.drop_duplicates(subset=["showId"], keep="last")
|
| 167 |
+
if not without_show_id.empty:
|
| 168 |
+
without_show_id = without_show_id.drop_duplicates(
|
| 169 |
+
subset=["影片名称", "放映日期", "放映时间_str", "影厅"],
|
| 170 |
+
keep="last",
|
| 171 |
+
)
|
| 172 |
+
|
| 173 |
+
normalized = pd.concat([with_show_id, without_show_id], ignore_index=True)
|
| 174 |
+
normalized = normalized.sort_values(["放映日期", "放映时间_str", "影厅", "影片名称"]).reset_index(drop=True)
|
| 175 |
+
normalized.drop(columns=["放映时间_str"], inplace=True)
|
| 176 |
+
return normalized[HISTORY_COLUMNS]
|
| 177 |
+
|
| 178 |
+
|
| 179 |
+
def load_history_df() -> pd.DataFrame:
|
| 180 |
+
ensure_state_dir()
|
| 181 |
+
|
| 182 |
+
if LOCAL_HISTORY_FILE.exists():
|
| 183 |
+
try:
|
| 184 |
+
return _normalize_history_df(pd.read_csv(LOCAL_HISTORY_FILE))
|
| 185 |
+
except Exception:
|
| 186 |
+
return create_empty_history_df()
|
| 187 |
+
|
| 188 |
+
if LEGACY_HISTORY_FILE.exists():
|
| 189 |
+
try:
|
| 190 |
+
legacy_df = pd.read_csv(LEGACY_HISTORY_FILE)
|
| 191 |
+
history_df = _normalize_history_df(legacy_df)
|
| 192 |
+
save_history_df(history_df)
|
| 193 |
+
return history_df
|
| 194 |
+
except Exception:
|
| 195 |
+
return create_empty_history_df()
|
| 196 |
+
|
| 197 |
+
return create_empty_history_df()
|
| 198 |
+
|
| 199 |
+
|
| 200 |
+
def save_history_df(df: pd.DataFrame) -> pd.DataFrame:
|
| 201 |
+
ensure_state_dir()
|
| 202 |
+
normalized = _normalize_history_df(df)
|
| 203 |
+
normalized.to_csv(LOCAL_HISTORY_FILE, index=False)
|
| 204 |
+
return normalized
|
| 205 |
+
|
| 206 |
+
|
| 207 |
+
def merge_history_df(existing_df: Optional[pd.DataFrame], new_df: Optional[pd.DataFrame]) -> pd.DataFrame:
|
| 208 |
+
frames = []
|
| 209 |
+
if existing_df is not None and not existing_df.empty:
|
| 210 |
+
frames.append(existing_df)
|
| 211 |
+
if new_df is not None and not new_df.empty:
|
| 212 |
+
frames.append(new_df)
|
| 213 |
+
merged = pd.concat(frames, ignore_index=True) if frames else create_empty_history_df()
|
| 214 |
+
return save_history_df(merged)
|
| 215 |
+
|
| 216 |
+
|
| 217 |
+
def prepare_manual_report_history_df(raw_df: pd.DataFrame) -> pd.DataFrame:
|
| 218 |
+
if raw_df is None or raw_df.empty:
|
| 219 |
+
return create_empty_history_df()
|
| 220 |
+
|
| 221 |
+
prepared = raw_df.copy()
|
| 222 |
+
prepared["场次"] = 1
|
| 223 |
+
prepared.rename(
|
| 224 |
+
columns={
|
| 225 |
+
0: "影片名称",
|
| 226 |
+
1: "放映日期",
|
| 227 |
+
2: "放映时间",
|
| 228 |
+
5: "总人次",
|
| 229 |
+
6: "总收入",
|
| 230 |
+
7: "座位数",
|
| 231 |
+
},
|
| 232 |
+
inplace=True,
|
| 233 |
+
)
|
| 234 |
+
required_cols = ["影片名称", "放映日期", "放映时间", "座位数", "总收入", "总人次", "场次"]
|
| 235 |
+
prepared = prepared[required_cols]
|
| 236 |
+
prepared.dropna(subset=["影片名称", "放映日期", "放映时间"], inplace=True)
|
| 237 |
+
prepared["影片名称_清理后"] = prepared["影片名称"].apply(clean_movie_title)
|
| 238 |
+
prepared["影厅"] = ""
|
| 239 |
+
prepared["showId"] = ""
|
| 240 |
+
prepared["影片编码"] = ""
|
| 241 |
+
prepared["影片语言"] = ""
|
| 242 |
+
prepared["影片制式"] = ""
|
| 243 |
+
prepared["影片时长(分钟)"] = np.nan
|
| 244 |
+
prepared["影片时长档位"] = np.nan
|
| 245 |
+
prepared["影片时长类型"] = np.nan
|
| 246 |
+
return _normalize_history_df(prepared)
|
| 247 |
+
|
| 248 |
+
|
| 249 |
+
def prepare_history_df_from_schedule(schedule_list: List[dict], show_date: str, hall_seat_map=None, token: Optional[str] = None) -> pd.DataFrame:
|
| 250 |
+
if not schedule_list:
|
| 251 |
+
return create_empty_history_df()
|
| 252 |
+
|
| 253 |
+
hall_seat_map = {str(key): value for key, value in (hall_seat_map or {}).items()}
|
| 254 |
+
canonical_names = fetch_canonical_movie_names(token, show_date) if token else []
|
| 255 |
+
rows = []
|
| 256 |
+
|
| 257 |
+
for item in schedule_list:
|
| 258 |
+
movie_name = item.get("movieName")
|
| 259 |
+
start_time = item.get("showStartTime")
|
| 260 |
+
if not movie_name or not start_time:
|
| 261 |
+
continue
|
| 262 |
+
|
| 263 |
+
movie_length = pd.to_numeric(item.get("movieLength"), errors="coerce")
|
| 264 |
+
cleaned_name = clean_movie_title(movie_name, canonical_names if canonical_names else None)
|
| 265 |
+
hall_id = str(item.get("hallId") or "").strip()
|
| 266 |
+
rows.append(
|
| 267 |
+
{
|
| 268 |
+
"showId": str(item.get("showId") or "").strip(),
|
| 269 |
+
"影片名称": cleaned_name,
|
| 270 |
+
"影片名称_清理后": cleaned_name,
|
| 271 |
+
"放映日期": show_date,
|
| 272 |
+
"放映时间": start_time,
|
| 273 |
+
"影厅": item.get("hallName") or "",
|
| 274 |
+
"座位数": hall_seat_map.get(hall_id, 0),
|
| 275 |
+
"总收入": pd.to_numeric(item.get("soldBoxOffice"), errors="coerce"),
|
| 276 |
+
"总人次": pd.to_numeric(item.get("soldTicketNum"), errors="coerce"),
|
| 277 |
+
"场次": 1,
|
| 278 |
+
"影片时长(分钟)": movie_length,
|
| 279 |
+
"影片时长档位": round_minutes_to_10min(movie_length),
|
| 280 |
+
"影片时长类型": create_duration_label(round_minutes_to_10min(movie_length)),
|
| 281 |
+
"影片编码": str(item.get("movieNum") or "").strip(),
|
| 282 |
+
"影片语言": str(item.get("movieLanguage") or "").strip(),
|
| 283 |
+
"影片制式": str(item.get("movieMediaType") or "").strip(),
|
| 284 |
+
}
|
| 285 |
+
)
|
| 286 |
+
|
| 287 |
+
return _normalize_history_df(pd.DataFrame(rows))
|
| 288 |
+
|
| 289 |
+
|
| 290 |
+
def get_available_date_set(df: Optional[pd.DataFrame]) -> set:
|
| 291 |
+
if df is None or df.empty or "放映日期" not in df.columns:
|
| 292 |
+
return set()
|
| 293 |
+
return {value.date() for value in pd.to_datetime(df["放映日期"], errors="coerce").dropna()}
|
| 294 |
+
|
| 295 |
+
|
| 296 |
+
def find_missing_dates(df: Optional[pd.DataFrame], start_date: date, end_date: date) -> List[date]:
|
| 297 |
+
if start_date > end_date:
|
| 298 |
+
return []
|
| 299 |
+
existing_dates = get_available_date_set(df)
|
| 300 |
+
missing_dates = []
|
| 301 |
+
current = start_date
|
| 302 |
+
while current <= end_date:
|
| 303 |
+
if current not in existing_dates:
|
| 304 |
+
missing_dates.append(current)
|
| 305 |
+
current += timedelta(days=1)
|
| 306 |
+
return missing_dates
|
| 307 |
+
|
| 308 |
+
|
| 309 |
+
def build_duration_reference_from_history(df: Optional[pd.DataFrame]) -> pd.DataFrame:
|
| 310 |
+
if df is None or df.empty or "影片时长(分钟)" not in df.columns:
|
| 311 |
+
return pd.DataFrame(
|
| 312 |
+
columns=["影片", "影片名称_清理后", "影片时长(分钟)", "影片时长档位", "影片时长类型", "记录场次"]
|
| 313 |
+
)
|
| 314 |
+
|
| 315 |
+
duration_df = df.copy()
|
| 316 |
+
duration_df["影片时长(分钟)"] = pd.to_numeric(duration_df["影片时长(分钟)"], errors="coerce")
|
| 317 |
+
duration_df = duration_df.dropna(subset=["影片名称_清理后", "影片时长(分钟)"]).copy()
|
| 318 |
+
if duration_df.empty:
|
| 319 |
+
return pd.DataFrame(
|
| 320 |
+
columns=["影片", "影片名称_清理后", "影片时长(分钟)", "影片时长档位", "影片时长类型", "记录场次"]
|
| 321 |
+
)
|
| 322 |
+
|
| 323 |
+
duration_df["影片时长(分钟)"] = duration_df["影片时长(分钟)"].round().astype(int)
|
| 324 |
+
duration_df["影片时长档位"] = duration_df["影片时长(分钟)"].apply(round_minutes_to_10min)
|
| 325 |
+
duration_df["影片时长类型"] = duration_df["影片时长档位"].apply(create_duration_label)
|
| 326 |
+
duration_df["影片"] = duration_df["影片名称"]
|
| 327 |
+
|
| 328 |
+
summary = (
|
| 329 |
+
duration_df.groupby(["影片名称_清理后", "影片时长(分钟)", "影片时长档位", "影片时长类型"], as_index=False)
|
| 330 |
+
.agg(影片=("影片", "first"), 记录场次=("场次", "sum"))
|
| 331 |
+
.sort_values(["影片名称_清理后", "影片时长(分钟)"])
|
| 332 |
+
.reset_index(drop=True)
|
| 333 |
+
)
|
| 334 |
+
return summary[["影片", "影片名称_清理后", "影片时长(分钟)", "影片时长档位", "影片时长类型", "记录场次"]]
|
| 335 |
+
|
| 336 |
+
|
| 337 |
+
def summarize_total_box_office_by_movies(df: Optional[pd.DataFrame], movie_names: Iterable[str]) -> pd.DataFrame:
|
| 338 |
+
requested_names = [str(name).strip() for name in (movie_names or []) if str(name).strip()]
|
| 339 |
+
if not requested_names:
|
| 340 |
+
return pd.DataFrame(columns=["影片", "总票房"])
|
| 341 |
+
|
| 342 |
+
if df is None or df.empty:
|
| 343 |
+
return pd.DataFrame({"影片": requested_names, "总票房": [0.0] * len(requested_names)})
|
| 344 |
+
|
| 345 |
+
history_df = df.copy()
|
| 346 |
+
history_df["总收入"] = pd.to_numeric(history_df["总收入"], errors="coerce").fillna(0.0)
|
| 347 |
+
totals = history_df.groupby("影片名称_清理后")["总收入"].sum().to_dict()
|
| 348 |
+
output_df = pd.DataFrame(
|
| 349 |
+
{
|
| 350 |
+
"影片": requested_names,
|
| 351 |
+
"总票房": [float(totals.get(name, 0.0)) for name in requested_names],
|
| 352 |
+
}
|
| 353 |
+
)
|
| 354 |
+
return output_df.sort_values(["总票房", "影片"], ascending=[False, True]).reset_index(drop=True)
|
| 355 |
+
|
| 356 |
+
|
| 357 |
+
def default_history_manifest() -> dict:
|
| 358 |
+
return {
|
| 359 |
+
"synced_dates": [],
|
| 360 |
+
"updated_at": "",
|
| 361 |
+
"last_successful_target_date": "",
|
| 362 |
+
}
|
| 363 |
+
|
| 364 |
+
|
| 365 |
+
def load_history_manifest() -> dict:
|
| 366 |
+
ensure_state_dir()
|
| 367 |
+
manifest = default_history_manifest()
|
| 368 |
+
|
| 369 |
+
if LOCAL_HISTORY_MANIFEST_FILE.exists():
|
| 370 |
+
try:
|
| 371 |
+
payload = json.loads(LOCAL_HISTORY_MANIFEST_FILE.read_text(encoding="utf-8"))
|
| 372 |
+
if isinstance(payload, dict):
|
| 373 |
+
manifest.update(payload)
|
| 374 |
+
except Exception:
|
| 375 |
+
pass
|
| 376 |
+
|
| 377 |
+
synced_dates = manifest.get("synced_dates", [])
|
| 378 |
+
if not isinstance(synced_dates, list):
|
| 379 |
+
synced_dates = []
|
| 380 |
+
manifest["synced_dates"] = sorted({str(item).strip() for item in synced_dates if str(item).strip()})
|
| 381 |
+
return manifest
|
| 382 |
+
|
| 383 |
+
|
| 384 |
+
def save_history_manifest(manifest: Optional[dict]) -> dict:
|
| 385 |
+
ensure_state_dir()
|
| 386 |
+
final_manifest = default_history_manifest()
|
| 387 |
+
if isinstance(manifest, dict):
|
| 388 |
+
final_manifest.update(manifest)
|
| 389 |
+
|
| 390 |
+
synced_dates = final_manifest.get("synced_dates", [])
|
| 391 |
+
if not isinstance(synced_dates, list):
|
| 392 |
+
synced_dates = []
|
| 393 |
+
final_manifest["synced_dates"] = sorted({str(item).strip() for item in synced_dates if str(item).strip()})
|
| 394 |
+
|
| 395 |
+
LOCAL_HISTORY_MANIFEST_FILE.write_text(
|
| 396 |
+
json.dumps(final_manifest, ensure_ascii=False, indent=2),
|
| 397 |
+
encoding="utf-8",
|
| 398 |
+
)
|
| 399 |
+
return final_manifest
|
| 400 |
+
|
| 401 |
+
|
| 402 |
+
def get_synced_date_set(manifest: Optional[dict]) -> set:
|
| 403 |
+
if not isinstance(manifest, dict):
|
| 404 |
+
return set()
|
| 405 |
+
synced_dates = manifest.get("synced_dates", [])
|
| 406 |
+
if not isinstance(synced_dates, list):
|
| 407 |
+
return set()
|
| 408 |
+
return {str(item).strip() for item in synced_dates if str(item).strip()}
|