hengdian

Running

App Files Files Community

Ethscriptions commited on 17 days ago

Commit

daca5f3

verified ·

1 Parent(s): 8c4699d

Upload historical_sessions.py

Browse files

Files changed (1) hide show

historical_sessions.py +408 -0

historical_sessions.py ADDED Viewed

	@@ -0,0 +1,408 @@

+import json
+import os
+from datetime import date, datetime, time as dt_time, timedelta
+from pathlib import Path
+from typing import Iterable, List, Optional
+import numpy as np
+import pandas as pd
+from cinema_api_client import fetch_canonical_movie_names
+ROOT_DIR = Path(__file__).resolve().parent
+STATE_DIR = ROOT_DIR / "cinema_cache"
+LOCAL_HISTORY_FILE = STATE_DIR / "historical_sessions.csv"
+LOCAL_HISTORY_MANIFEST_FILE = STATE_DIR / "historical_sessions_manifest.json"
+LEGACY_HISTORY_FILE = ROOT_DIR / "persistent_data.csv"
+HISTORY_COLUMNS = [
+    "showId",
+    "影片名称",
+    "影片名称_清理后",
+    "放映日期",
+    "放映时间",
+    "影厅",
+    "座位数",
+    "总收入",
+    "总人次",
+    "场次",
+    "影片时长(分钟)",
+    "影片时长档位",
+    "影片时长类型",
+    "影片编码",
+    "影片语言",
+    "影片制式",
+]
+def ensure_state_dir() -> None:
+    STATE_DIR.mkdir(parents=True, exist_ok=True)
+def clean_movie_title(raw_title, canonical_names=None):
+    if not isinstance(raw_title, str):
+        return raw_title
+    base_name = None
+    if canonical_names:
+        sorted_names = sorted(canonical_names, key=len, reverse=True)
+        for name in sorted_names:
+            if name in raw_title:
+                base_name = name
+                break
+    if not base_name:
+        base_name = raw_title.split(" ", 1)[0]
+    raw_upper = raw_title.upper()
+    suffix = ""
+    if "HDR LED" in raw_upper:
+        suffix = "（HDR LED）"
+    elif "CINITY" in raw_upper:
+        suffix = "（CINITY）"
+    elif "杜比" in raw_upper or "DOLBY" in raw_upper:
+        suffix = "（杜比视界）"
+    elif "IMAX" in raw_upper:
+        suffix = "（数字IMAX3D）" if "3D" in raw_upper else "（数字IMAX）"
+    elif "巨幕" in raw_upper:
+        suffix = "（中国巨幕立体）" if "立体" in raw_upper else "（中国巨幕）"
+    elif "3D" in raw_upper:
+        suffix = "（数字3D）"
+    if suffix and suffix not in base_name:
+        return f"{base_name}{suffix}"
+    return base_name
+def round_minutes_to_10min(minutes):
+    numeric_value = pd.to_numeric(minutes, errors="coerce")
+    if pd.isna(numeric_value) or float(numeric_value) <= 0:
+        return np.nan
+    return int(np.floor((float(numeric_value) + 5) / 10) * 10)
+def create_duration_label(minutes):
+    if pd.isna(minutes):
+        return np.nan
+    return f"{int(minutes)}分钟档"
+def create_empty_history_df() -> pd.DataFrame:
+    data = {column: pd.Series(dtype="object") for column in HISTORY_COLUMNS}
+    data["放映日期"] = pd.Series(dtype="datetime64[ns]")
+    data["座位数"] = pd.Series(dtype="int64")
+    data["总收入"] = pd.Series(dtype="float64")
+    data["总人次"] = pd.Series(dtype="int64")
+    data["场次"] = pd.Series(dtype="int64")
+    data["影片时长(分钟)"] = pd.Series(dtype="float64")
+    data["影片时长档位"] = pd.Series(dtype="float64")
+    return pd.DataFrame(data)
+def normalize_time_value(value):
+    if pd.isna(value):
+        return None
+    if isinstance(value, datetime):
+        return value.time().replace(second=0, microsecond=0)
+    if isinstance(value, dt_time):
+        return value.replace(second=0, microsecond=0)
+    numeric_value = pd.to_numeric(pd.Series([value]), errors="coerce").iloc[0]
+    if pd.notna(numeric_value) and 0 <= float(numeric_value) < 1:
+        total_minutes = int(round(float(numeric_value) * 24 * 60)) % (24 * 60)
+        return (datetime.min + timedelta(minutes=total_minutes)).time()
+    parsed = pd.to_datetime(str(value), errors="coerce")
+    if pd.isna(parsed):
+        return None
+    return parsed.time().replace(second=0, microsecond=0)
+def _normalize_history_df(df: Optional[pd.DataFrame]) -> pd.DataFrame:
+    if df is None or df.empty:
+        return create_empty_history_df()
+    normalized = df.copy()
+    for column in HISTORY_COLUMNS:
+        if column not in normalized.columns:
+            normalized[column] = np.nan
+    normalized["影片名称"] = normalized["影片名称"].astype(str).str.strip()
+    normalized = normalized[normalized["影片名称"].ne("") & normalized["影片名称"].ne("nan")].copy()
+    normalized["影片名称_清理后"] = normalized["影片名称_清理后"].where(
+        normalized["影片名称_清理后"].notna(),
+        normalized["影片名称"].apply(clean_movie_title),
+    )
+    normalized["影片名称_清理后"] = normalized["影片名称_清理后"].astype(str).str.strip()
+    normalized["放映日期"] = pd.to_datetime(normalized["放映日期"], errors="coerce").dt.normalize()
+    normalized["放映时间"] = normalized["放映时间"].apply(normalize_time_value)
+    for column in ["座位数", "总人次", "场次"]:
+        normalized[column] = pd.to_numeric(normalized[column], errors="coerce").fillna(0).round().astype(int)
+    normalized["总收入"] = pd.to_numeric(normalized["总收入"], errors="coerce").fillna(0.0).astype(float)
+    normalized["影片时长(分钟)"] = pd.to_numeric(normalized["影片时长(分钟)"], errors="coerce")
+    normalized = normalized[
+        (normalized["影片时长(分钟)"].isna()) |
+        ((normalized["影片时长(分钟)"] > 0) & (normalized["影片时长(分钟)"] <= 400))
+    ].copy()
+    normalized["影片时长档位"] = normalized["影片时长(分钟)"].apply(round_minutes_to_10min)
+    normalized["影片时长类型"] = normalized["影片时长档位"].apply(create_duration_label)
+    normalized["影厅"] = normalized["影厅"].fillna("").astype(str).str.strip()
+    normalized["showId"] = normalized["showId"].fillna("").astype(str).str.strip()
+    normalized["影片编码"] = normalized["影片编码"].fillna("").astype(str).str.strip()
+    normalized["影片语言"] = normalized["影片语言"].fillna("").astype(str).str.strip()
+    normalized["影片制式"] = normalized["影片制式"].fillna("").astype(str).str.strip()
+    normalized = normalized.dropna(subset=["放映日期", "放映时间"]).copy()
+    normalized["放映时间_str"] = normalized["放映时间"].apply(lambda value: value.strftime("%H:%M:%S") if isinstance(value, dt_time) else "")
+    with_show_id = normalized[normalized["showId"].ne("")].copy()
+    without_show_id = normalized[normalized["showId"].eq("")].copy()
+    if not with_show_id.empty:
+        with_show_id = with_show_id.drop_duplicates(subset=["showId"], keep="last")
+    if not without_show_id.empty:
+        without_show_id = without_show_id.drop_duplicates(
+            subset=["影片名称", "放映日期", "放映时间_str", "影厅"],
+            keep="last",
+        )
+    normalized = pd.concat([with_show_id, without_show_id], ignore_index=True)
+    normalized = normalized.sort_values(["放映日期", "放映时间_str", "影厅", "影片名称"]).reset_index(drop=True)
+    normalized.drop(columns=["放映时间_str"], inplace=True)
+    return normalized[HISTORY_COLUMNS]
+def load_history_df() -> pd.DataFrame:
+    ensure_state_dir()
+    if LOCAL_HISTORY_FILE.exists():
+        try:
+            return _normalize_history_df(pd.read_csv(LOCAL_HISTORY_FILE))
+        except Exception:
+            return create_empty_history_df()
+    if LEGACY_HISTORY_FILE.exists():
+        try:
+            legacy_df = pd.read_csv(LEGACY_HISTORY_FILE)
+            history_df = _normalize_history_df(legacy_df)
+            save_history_df(history_df)
+            return history_df
+        except Exception:
+            return create_empty_history_df()
+    return create_empty_history_df()
+def save_history_df(df: pd.DataFrame) -> pd.DataFrame:
+    ensure_state_dir()
+    normalized = _normalize_history_df(df)
+    normalized.to_csv(LOCAL_HISTORY_FILE, index=False)
+    return normalized
+def merge_history_df(existing_df: Optional[pd.DataFrame], new_df: Optional[pd.DataFrame]) -> pd.DataFrame:
+    frames = []
+    if existing_df is not None and not existing_df.empty:
+        frames.append(existing_df)
+    if new_df is not None and not new_df.empty:
+        frames.append(new_df)
+    merged = pd.concat(frames, ignore_index=True) if frames else create_empty_history_df()
+    return save_history_df(merged)
+def prepare_manual_report_history_df(raw_df: pd.DataFrame) -> pd.DataFrame:
+    if raw_df is None or raw_df.empty:
+        return create_empty_history_df()
+    prepared = raw_df.copy()
+    prepared["场次"] = 1
+    prepared.rename(
+        columns={
+            0: "影片名称",
+            1: "放映日期",
+            2: "放映时间",
+            5: "总人次",
+            6: "总收入",
+            7: "座位数",
+        },
+        inplace=True,
+    )
+    required_cols = ["影片名称", "放映日期", "放映时间", "座位数", "总收入", "总人次", "场次"]
+    prepared = prepared[required_cols]
+    prepared.dropna(subset=["影片名称", "放映日期", "放映时间"], inplace=True)
+    prepared["影片名称_清理后"] = prepared["影片名称"].apply(clean_movie_title)
+    prepared["影厅"] = ""
+    prepared["showId"] = ""
+    prepared["影片编码"] = ""
+    prepared["影片语言"] = ""
+    prepared["影片制式"] = ""
+    prepared["影片时长(分钟)"] = np.nan
+    prepared["影片时长档位"] = np.nan
+    prepared["影片时长类型"] = np.nan
+    return _normalize_history_df(prepared)
+def prepare_history_df_from_schedule(schedule_list: List[dict], show_date: str, hall_seat_map=None, token: Optional[str] = None) -> pd.DataFrame:
+    if not schedule_list:
+        return create_empty_history_df()
+    hall_seat_map = {str(key): value for key, value in (hall_seat_map or {}).items()}
+    canonical_names = fetch_canonical_movie_names(token, show_date) if token else []
+    rows = []
+    for item in schedule_list:
+        movie_name = item.get("movieName")
+        start_time = item.get("showStartTime")
+        if not movie_name or not start_time:
+            continue
+        movie_length = pd.to_numeric(item.get("movieLength"), errors="coerce")
+        cleaned_name = clean_movie_title(movie_name, canonical_names if canonical_names else None)
+        hall_id = str(item.get("hallId") or "").strip()
+        rows.append(
+            {
+                "showId": str(item.get("showId") or "").strip(),
+                "影片名称": cleaned_name,
+                "影片名称_清理后": cleaned_name,
+                "放映日期": show_date,
+                "放映时间": start_time,
+                "影厅": item.get("hallName") or "",
+                "座位数": hall_seat_map.get(hall_id, 0),
+                "总收入": pd.to_numeric(item.get("soldBoxOffice"), errors="coerce"),
+                "总人次": pd.to_numeric(item.get("soldTicketNum"), errors="coerce"),
+                "场次": 1,
+                "影片时长(分钟)": movie_length,
+                "影片时长档位": round_minutes_to_10min(movie_length),
+                "影片时长类型": create_duration_label(round_minutes_to_10min(movie_length)),
+                "影片编码": str(item.get("movieNum") or "").strip(),
+                "影片语言": str(item.get("movieLanguage") or "").strip(),
+                "影片制式": str(item.get("movieMediaType") or "").strip(),
+            }
+        )
+    return _normalize_history_df(pd.DataFrame(rows))
+def get_available_date_set(df: Optional[pd.DataFrame]) -> set:
+    if df is None or df.empty or "放映日期" not in df.columns:
+        return set()
+    return {value.date() for value in pd.to_datetime(df["放映日期"], errors="coerce").dropna()}
+def find_missing_dates(df: Optional[pd.DataFrame], start_date: date, end_date: date) -> List[date]:
+    if start_date > end_date:
+        return []
+    existing_dates = get_available_date_set(df)
+    missing_dates = []
+    current = start_date
+    while current <= end_date:
+        if current not in existing_dates:
+            missing_dates.append(current)
+        current += timedelta(days=1)
+    return missing_dates
+def build_duration_reference_from_history(df: Optional[pd.DataFrame]) -> pd.DataFrame:
+    if df is None or df.empty or "影片时长(分钟)" not in df.columns:
+        return pd.DataFrame(
+            columns=["影片", "影片名称_清理后", "影片时长(分钟)", "影片时长档位", "影片时长类型", "记录场次"]
+        )
+    duration_df = df.copy()
+    duration_df["影片时长(分钟)"] = pd.to_numeric(duration_df["影片时长(分钟)"], errors="coerce")
+    duration_df = duration_df.dropna(subset=["影片名称_清理后", "影片时长(分钟)"]).copy()
+    if duration_df.empty:
+        return pd.DataFrame(
+            columns=["影片", "影片名称_清理后", "影片时长(分钟)", "影片时长档位", "影片时长类型", "记录场次"]
+        )
+    duration_df["影片时长(分钟)"] = duration_df["影片时长(分钟)"].round().astype(int)
+    duration_df["影片时长档位"] = duration_df["影片时长(分钟)"].apply(round_minutes_to_10min)
+    duration_df["影片时长类型"] = duration_df["影片时长档位"].apply(create_duration_label)
+    duration_df["影片"] = duration_df["影片名称"]
+    summary = (
+        duration_df.groupby(["影片名称_清理后", "影片时长(分钟)", "影片时长档位", "影片时长类型"], as_index=False)
+        .agg(影片=("影片", "first"), 记录场次=("场次", "sum"))
+        .sort_values(["影片名称_清理后", "影片时长(分钟)"])
+        .reset_index(drop=True)
+    )
+    return summary[["影片", "影片名称_清理后", "影片时长(分钟)", "影片时长档位", "影片时长类型", "记录场次"]]
+def summarize_total_box_office_by_movies(df: Optional[pd.DataFrame], movie_names: Iterable[str]) -> pd.DataFrame:
+    requested_names = [str(name).strip() for name in (movie_names or []) if str(name).strip()]
+    if not requested_names:
+        return pd.DataFrame(columns=["影片", "总票房"])
+    if df is None or df.empty:
+        return pd.DataFrame({"影片": requested_names, "总票房": [0.0] * len(requested_names)})
+    history_df = df.copy()
+    history_df["总收入"] = pd.to_numeric(history_df["总收入"], errors="coerce").fillna(0.0)
+    totals = history_df.groupby("影片名称_清理后")["总收入"].sum().to_dict()
+    output_df = pd.DataFrame(
+        {
+            "影片": requested_names,
+            "总票房": [float(totals.get(name, 0.0)) for name in requested_names],
+        }
+    )
+    return output_df.sort_values(["总票房", "影片"], ascending=[False, True]).reset_index(drop=True)
+def default_history_manifest() -> dict:
+    return {
+        "synced_dates": [],
+        "updated_at": "",
+        "last_successful_target_date": "",
+    }
+def load_history_manifest() -> dict:
+    ensure_state_dir()
+    manifest = default_history_manifest()
+    if LOCAL_HISTORY_MANIFEST_FILE.exists():
+        try:
+            payload = json.loads(LOCAL_HISTORY_MANIFEST_FILE.read_text(encoding="utf-8"))
+            if isinstance(payload, dict):
+                manifest.update(payload)
+        except Exception:
+            pass
+    synced_dates = manifest.get("synced_dates", [])
+    if not isinstance(synced_dates, list):
+        synced_dates = []
+    manifest["synced_dates"] = sorted({str(item).strip() for item in synced_dates if str(item).strip()})
+    return manifest
+def save_history_manifest(manifest: Optional[dict]) -> dict:
+    ensure_state_dir()
+    final_manifest = default_history_manifest()
+    if isinstance(manifest, dict):
+        final_manifest.update(manifest)
+    synced_dates = final_manifest.get("synced_dates", [])
+    if not isinstance(synced_dates, list):
+        synced_dates = []
+    final_manifest["synced_dates"] = sorted({str(item).strip() for item in synced_dates if str(item).strip()})
+    LOCAL_HISTORY_MANIFEST_FILE.write_text(
+        json.dumps(final_manifest, ensure_ascii=False, indent=2),
+        encoding="utf-8",
+    )
+    return final_manifest
+def get_synced_date_set(manifest: Optional[dict]) -> set:
+    if not isinstance(manifest, dict):
+        return set()
+    synced_dates = manifest.get("synced_dates", [])
+    if not isinstance(synced_dates, list):
+        return set()
+    return {str(item).strip() for item in synced_dates if str(item).strip()}