Spaces:

Rsnarsna
/

transcript

Sleeping

App Files Files Community

rsnarsna commited on about 1 month ago

Commit

dda4ec3

0 Parent(s):

first commit

Browse files

Files changed (6) hide show

.gitignore +220 -0
client_secret.json +12 -0
fastapi_app.py +1138 -0
gemini_transcript.py +445 -0
index.html +378 -0
requirements.txt +7 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,220 @@

+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[codz]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#   Usually these files are written by a python script from a template
+#   before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py.cover
+*.lcov
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+# Pipfile.lock
+# UV
+#   Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+# uv.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+# poetry.lock
+# poetry.toml
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#   pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
+#   https://pdm-project.org/en/latest/usage/project/#working-with-version-control
+# pdm.lock
+# pdm.toml
+.pdm-python
+.pdm-build/
+# pixi
+#   Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
+# pixi.lock
+#   Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
+#   in the .venv directory. It is recommended not to include this directory in version control.
+.pixi/*
+!.pixi/config.toml
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# Celery stuff
+celerybeat-schedule*
+celerybeat.pid
+# Redis
+*.rdb
+*.aof
+*.pid
+# RabbitMQ
+mnesia/
+rabbitmq/
+rabbitmq-data/
+# ActiveMQ
+activemq-data/
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.envrc
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# PyCharm
+#   JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#   be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#   and can be added to the global gitignore or merged into this file.  For a more nuclear
+#   option (not recommended) you can uncomment the following to ignore the entire idea folder.
+# .idea/
+# Abstra
+#   Abstra is an AI-powered process automation framework.
+#   Ignore directories containing user credentials, local state, and settings.
+#   Learn more at https://abstra.io/docs
+.abstra/
+# Visual Studio Code
+#   Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
+#   that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
+#   and can be added to the global gitignore or merged into this file. However, if you prefer,
+#   you could uncomment the following to ignore the entire vscode folder
+# .vscode/
+# Temporary file for partial code execution
+tempCodeRunnerFile.py
+# Ruff stuff:
+.ruff_cache/
+# PyPI configuration file
+.pypirc
+# Marimo
+marimo/_static/
+marimo/_lsp/
+__marimo__/
+# Streamlit
+.streamlit/secrets.toml

client_secret.json ADDED Viewed

	@@ -0,0 +1,12 @@

+{
+  "web": {
+    "client_id": "769133159215-9gbq0l5v49kmclfcq7vbq7tutck0aphd.apps.googleusercontent.com",
+    "project_id": "root-isotope-497908-u0",
+    "auth_uri": "https://accounts.google.com/o/oauth2/auth",
+    "token_uri": "https://oauth2.googleapis.com/token",
+    "auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs",
+    "client_secret": "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",
+    "redirect_uris": ["http://localhost:8000/auth/callback"],
+    "javascript_origins": ["http://localhost:8000"]
+  }
+}

fastapi_app.py ADDED Viewed

	@@ -0,0 +1,1138 @@

+import os
+import json
+import base64
+import hashlib
+import secrets
+import shutil
+import tempfile
+import threading
+from pathlib import Path
+from email.mime.text import MIMEText
+from datetime import datetime, timezone
+from fastapi.responses import FileResponse
+from fastapi import FastAPI, Request, HTTPException
+from fastapi.responses import RedirectResponse, HTMLResponse
+from pydantic import BaseModel
+from google_auth_oauthlib.flow import Flow
+from google.auth.transport.requests import Request as GoogleRequest
+from google.oauth2.credentials import Credentials
+from googleapiclient.discovery import build
+from googleapiclient.http import MediaFileUpload
+from gemini_transcript import TranscriptSummaryPipeline
+# ============================================================================
+# CONFIG
+# ============================================================================
+os.environ.setdefault("OAUTHLIB_INSECURE_TRANSPORT", "1")
+BASE_DIR                = Path(__file__).resolve().parent
+CLIENT_SECRETS          = os.getenv("CLIENT_SECRETS",          str(BASE_DIR / "client_secret.json"))
+TOKEN_PATH              = os.getenv("GOOGLE_OAUTH_TOKEN_PATH",  str(BASE_DIR / "Google_oauth_token.json"))
+REDIRECT_URI            = os.getenv("REDIRECT_URI",             "http://localhost:8000/auth/callback")
+STATE_FILE              = BASE_DIR / "oauth_states.json"
+DEFAULT_SPREADSHEET_ID  = os.getenv("DEFAULT_SPREADSHEET_ID",  "1XA3vW_guHBT-ktkYvhktmUqcquECBe8exGZAoSQS3Ag")
+DEFAULT_DRIVE_FOLDER_ID = os.getenv("DEFAULT_DRIVE_FOLDER_ID", "1hI6dNXysR_2p9gHkDpsI-iwMExmy2hhR")
+SCOPES = [
+    "https://www.googleapis.com/auth/spreadsheets",
+    "https://www.googleapis.com/auth/gmail.send",
+    "https://www.googleapis.com/auth/drive.file",
+]
+OUTPUT_DIR      = BASE_DIR / "output"
+OUTPUT_DIR.mkdir(exist_ok=True)
+SUMMARY_FILE    = OUTPUT_DIR / "summary.txt"
+QA_FILE         = OUTPUT_DIR / "qa.txt"
+TRANSCRIPT_FILE = OUTPUT_DIR / "transcript.txt"
+SHEETS_HEADERS = [
+    "Timestamp",             # A
+    "Job ID",                # B
+    "Video Title",           # C
+    "YouTube URL",           # D
+    "Model Used",            # E
+    "Status",                # F
+    "Summary Drive Link",    # G
+    "Q&A Drive Link",        # H
+    "Transcript Drive Link", # I
+    "Email Sent To",         # J
+    "Email Status",          # K
+    "Email Message ID",      # L
+    "Completed At",          # M
+    "Error",                 # N
+]
+# ============================================================================
+# IN-MEMORY JOB STORE
+# ============================================================================
+_jobs: dict[str, dict] = {}
+_jobs_lock = threading.Lock()
+STEPS = [
+    "fetch_transcript",
+    "summarize",
+    "create_drive_folder",
+    "upload_summary",
+    "upload_qa",
+    "upload_transcript",
+    "send_email",
+    "log_sheet",
+]
+def _new_job(job_id: str, youtube_url: str, email_to: str) -> dict:
+    job = {
+        "job_id":       job_id,
+        "status":       "initiated",
+        "youtube_url":  youtube_url,
+        "email_to":     email_to,
+        "started_at":   datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S UTC"),
+        "completed_at": None,
+        "steps":        {s: "pending" for s in STEPS},
+        "result":       None,
+        "error":        None,
+    }
+    with _jobs_lock:
+        _jobs[job_id] = job
+    return job
+def _update_job(job_id: str, **kwargs):
+    with _jobs_lock:
+        if job_id in _jobs:
+            _jobs[job_id].update(kwargs)
+def _set_step(job_id: str, step: str, state: str):
+    with _jobs_lock:
+        if job_id in _jobs:
+            _jobs[job_id]["steps"][step] = state
+# ============================================================================
+# APP
+# ============================================================================
+app = FastAPI(title="Google Integration API", version="7.0.0")
+# ============================================================================
+# MODELS
+# ============================================================================
+class GenerateRequest(BaseModel):
+    youtube_url: str
+    email_to: str
+class EmailRequest(BaseModel):
+    to: str
+    subject: str
+    body: str
+class CreateFileRequest(BaseModel):
+    filename: str
+    content: str
+    mimetype: str         = "text/plain"
+    folder_id: str | None = None
+    make_public: bool     = True
+class SheetWriteRequest(BaseModel):
+    spreadsheet_id: str = DEFAULT_SPREADSHEET_ID
+    range_name: str     = "Sheet1!A1"
+    values: list[list]
+class SheetClearRequest(BaseModel):
+    spreadsheet_id: str = DEFAULT_SPREADSHEET_ID
+    range_name: str     = "Sheet1!A1:Z1000"
+# ============================================================================
+# STATE PERSISTENCE
+# ============================================================================
+def load_states() -> dict:
+    try:
+        return json.loads(STATE_FILE.read_text())
+    except Exception:
+        return {}
+def save_states(states: dict) -> None:
+    STATE_FILE.write_text(json.dumps(states))
+# ============================================================================
+# AUTH
+# ============================================================================
+def create_flow() -> Flow:
+    if not os.path.exists(CLIENT_SECRETS):
+        raise FileNotFoundError(f"Missing client secret: {CLIENT_SECRETS}")
+    return Flow.from_client_secrets_file(
+        CLIENT_SECRETS, scopes=SCOPES, redirect_uri=REDIRECT_URI
+    )
+def load_credentials() -> Credentials | None:
+    if not os.path.exists(TOKEN_PATH):
+        return None
+    creds = Credentials.from_authorized_user_file(TOKEN_PATH, SCOPES)
+    if not creds.valid:
+        if creds.expired and creds.refresh_token:
+            creds.refresh(GoogleRequest())
+            Path(TOKEN_PATH).write_text(creds.to_json(), encoding="utf-8")
+        else:
+            return None
+    return creds
+def require_credentials() -> Credentials:
+    creds = load_credentials()
+    if creds is None:
+        raise HTTPException(
+            status_code=401,
+            detail="Not authenticated. Visit http://localhost:8000/auth/start",
+        )
+    return creds
+# ============================================================================
+# GMAIL
+# ============================================================================
+def _raw_message(to: str, subject: str, body: str) -> dict:
+    msg            = MIMEText(body)
+    msg["to"]      = to
+    msg["subject"] = subject
+    return {"raw": base64.urlsafe_b64encode(msg.as_bytes()).decode()}
+def send_email(to: str, subject: str, body: str, creds: Credentials = None) -> dict:
+    if creds is None:
+        creds = require_credentials()
+    return (
+        build("gmail", "v1", credentials=creds)
+        .users().messages()
+        .send(userId="me", body=_raw_message(to, subject, body))
+        .execute()
+    )
+# ============================================================================
+# DRIVE
+# ============================================================================
+def _direct_link(file_id: str) -> str:
+    return f"https://drive.google.com/uc?export=download&id={file_id}"
+def _make_public(service, file_id: str) -> dict:
+    service.permissions().create(
+        fileId=file_id,
+        body={"type": "anyone", "role": "reader"},
+    ).execute()
+    return service.files().get(
+        fileId=file_id,
+        fields="id,name,webViewLink,webContentLink,mimeType,size,createdTime,modifiedTime",
+    ).execute()
+def _drive(creds: Credentials):
+    return build("drive", "v3", credentials=creds)
+def create_drive_folder(
+    folder_name: str,
+    parent_folder_id: str | None = None,
+    creds: Credentials           = None,
+) -> str:
+    if creds is None:
+        creds = require_credentials()
+    meta = {
+        "name":     folder_name,
+        "mimeType": "application/vnd.google-apps.folder",
+    }
+    pid = parent_folder_id or DEFAULT_DRIVE_FOLDER_ID
+    if pid:
+        meta["parents"] = [pid]
+    folder = _drive(creds).files().create(body=meta, fields="id").execute()
+    return folder["id"]
+def upload_file_to_drive(
+    filepath: str,
+    creds: Credentials    = None,
+    folder_id: str | None = None,
+    make_public: bool     = True,
+) -> dict:
+    if creds is None:
+        creds = require_credentials()
+    svc   = _drive(creds)
+    meta  = {"name": os.path.basename(filepath)}
+    if folder_id:
+        meta["parents"] = [folder_id]
+    media   = MediaFileUpload(filepath, resumable=True)
+    file    = svc.files().create(
+        body=meta, media_body=media,
+        fields="id,name,webViewLink,webContentLink,mimeType,size",
+    ).execute()
+    file_id = file["id"]
+    if make_public:
+        file = _make_public(svc, file_id)
+    file["direct_download_link"] = _direct_link(file_id)
+    return file
+def create_file_on_drive(
+    filename: str,
+    content: str,
+    mimetype: str         = "text/plain",
+    creds: Credentials    = None,
+    folder_id: str | None = None,
+    make_public: bool     = True,
+) -> dict:
+    if creds is None:
+        creds = require_credentials()
+    svc  = _drive(creds)
+    meta = {"name": filename}
+    if folder_id:
+        meta["parents"] = [folder_id]
+    suffix   = Path(filename).suffix or ".txt"
+    tmp      = tempfile.NamedTemporaryFile(
+        mode="w", suffix=suffix, delete=False, encoding="utf-8"
+    )
+    tmp.write(content)
+    tmp.close()
+    tmp_path = tmp.name
+    try:
+        media = MediaFileUpload(tmp_path, mimetype=mimetype, resumable=True)
+        file  = svc.files().create(
+            body=meta, media_body=media,
+            fields="id,name,webViewLink,webContentLink,mimeType,size",
+        ).execute()
+        media._fd.close()
+    finally:
+        try:
+            os.unlink(tmp_path)
+        except Exception:
+            pass
+    file_id = file["id"]
+    if make_public:
+        file = _make_public(svc, file_id)
+    file["direct_download_link"] = _direct_link(file_id)
+    return file
+def get_drive_file(file_id: str, creds: Credentials = None) -> dict:
+    if creds is None:
+        creds = require_credentials()
+    file = _drive(creds).files().get(
+        fileId=file_id,
+        fields="id,name,webViewLink,webContentLink,mimeType,size,createdTime,modifiedTime",
+    ).execute()
+    file["direct_download_link"] = _direct_link(file_id)
+    return file
+def list_drive_files(
+    folder_id: str | None = None,
+    page_size: int        = 20,
+    creds: Credentials    = None,
+) -> list:
+    if creds is None:
+        creds = require_credentials()
+    fid   = folder_id or DEFAULT_DRIVE_FOLDER_ID
+    query = f"'{fid}' in parents and trashed=false" if fid else "trashed=false"
+    files = _drive(creds).files().list(
+        q=query, pageSize=page_size,
+        fields="files(id,name,webViewLink,webContentLink,mimeType,size,createdTime)",
+    ).execute().get("files", [])
+    for f in files:
+        f["direct_download_link"] = _direct_link(f["id"])
+    return files
+# ============================================================================
+# SHEETS
+# ============================================================================
+def _sheets(creds: Credentials):
+    return build("sheets", "v4", credentials=creds)
+def read_sheet(
+    spreadsheet_id: str,
+    range_name: str    = "Sheet1!A1:Z1000",
+    creds: Credentials = None,
+) -> list:
+    if creds is None:
+        creds = require_credentials()
+    return (
+        _sheets(creds).spreadsheets().values()
+        .get(spreadsheetId=spreadsheet_id, range=range_name)
+        .execute().get("values", [])
+    )
+def write_sheet(
+    spreadsheet_id: str,
+    range_name: str,
+    values: list[list],
+    creds: Credentials = None,
+) -> dict:
+    if creds is None:
+        creds = require_credentials()
+    return (
+        _sheets(creds).spreadsheets().values()
+        .update(
+            spreadsheetId=spreadsheet_id,
+            range=range_name,
+            valueInputOption="USER_ENTERED",
+            body={"values": values},
+        ).execute()
+    )
+def append_sheet(
+    spreadsheet_id: str,
+    range_name: str,
+    values: list[list],
+    creds: Credentials = None,
+) -> dict:
+    if creds is None:
+        creds = require_credentials()
+    return (
+        _sheets(creds).spreadsheets().values()
+        .append(
+            spreadsheetId=spreadsheet_id,
+            range=range_name,
+            valueInputOption="USER_ENTERED",
+            insertDataOption="INSERT_ROWS",
+            body={"values": values},
+        ).execute()
+    )
+def clear_sheet(
+    spreadsheet_id: str,
+    range_name: str,
+    creds: Credentials = None,
+) -> dict:
+    if creds is None:
+        creds = require_credentials()
+    return (
+        _sheets(creds).spreadsheets().values()
+        .clear(spreadsheetId=spreadsheet_id, range=range_name)
+        .execute()
+    )
+def get_sheet_metadata(
+    spreadsheet_id: str,
+    creds: Credentials = None,
+) -> dict:
+    if creds is None:
+        creds = require_credentials()
+    info = _sheets(creds).spreadsheets().get(spreadsheetId=spreadsheet_id).execute()
+    return {
+        "spreadsheet_id": info["spreadsheetId"],
+        "title":          info["properties"]["title"],
+        "url":            f"https://docs.google.com/spreadsheets/d/{info['spreadsheetId']}",
+        "sheets": [
+            {
+                "sheet_id": s["properties"]["sheetId"],
+                "title":    s["properties"]["title"],
+                "rows":     s["properties"]["gridProperties"]["rowCount"],
+                "cols":     s["properties"]["gridProperties"]["columnCount"],
+            }
+            for s in info.get("sheets", [])
+        ],
+    }
+def append_row_to_sheet(
+    values: list,
+    spreadsheet_id: str = DEFAULT_SPREADSHEET_ID,
+    range_name: str     = "Sheet1!A1",
+    creds: Credentials  = None,
+):
+    if not spreadsheet_id:
+        return None
+    return append_sheet(spreadsheet_id, range_name, [values], creds=creds)
+# ============================================================================
+# SHEETS — JOB RECORD HELPERS
+# ============================================================================
+def ensure_sheet_header(creds: Credentials = None) -> None:
+    if not DEFAULT_SPREADSHEET_ID:
+        return
+    try:
+        existing = read_sheet(DEFAULT_SPREADSHEET_ID, "Sheet1!A1:Z1", creds=creds)
+        if not existing:
+            write_sheet(
+                DEFAULT_SPREADSHEET_ID,
+                "Sheet1!A1",
+                [SHEETS_HEADERS],
+                creds=creds,
+            )
+    except Exception as exc:
+        print(f"[WARN] Could not write sheet header: {exc}")
+def _find_job_row(job_id: str, creds: Credentials) -> int | None:
+    """Find 1-based row number of job_id in column B."""
+    try:
+        rows = read_sheet(DEFAULT_SPREADSHEET_ID, "Sheet1!B:B", creds=creds)
+        for i, row in enumerate(rows, start=1):
+            if row and row[0] == job_id:
+                return i
+    except Exception:
+        pass
+    return None
+def _create_sheet_record(
+    job_id: str,
+    timestamp: str,
+    youtube_url: str,
+    email_to: str,
+    creds: Credentials,
+) -> None:
+    """Insert initial row when job starts."""
+    try:
+        row = [
+            timestamp,   # A — Timestamp
+            job_id,      # B — Job ID
+            "",          # C — Video Title
+            youtube_url, # D — YouTube URL
+            "",          # E — Model Used
+            "initiated", # F — Status
+            "",          # G — Summary Link
+            "",          # H — Q&A Link
+            "",          # I — Transcript Link
+            email_to,    # J — Email Sent To
+            "",          # K — Email Status
+            "",          # L — Email Message ID
+            "",          # M — Completed At
+            "",          # N — Error
+        ]
+        append_sheet(DEFAULT_SPREADSHEET_ID, "Sheet1!A1", [row], creds=creds)
+    except Exception as exc:
+        print(f"[WARN] Could not create sheet record: {exc}")
+def _update_sheet_record(
+    job_id: str,
+    creds: Credentials,
+    video_title: str     = "",
+    model_used: str      = "",
+    status: str          = "",
+    summary_link: str    = "",
+    qa_link: str         = "",
+    transcript_link: str = "",
+    email_status: str    = "",
+    email_msg_id: str    = "",
+    completed_at: str    = "",
+    error: str           = "",
+) -> None:
+    """Find job row by job_id and overwrite with updated values."""
+    if not DEFAULT_SPREADSHEET_ID:
+        return
+    try:
+        row_num = _find_job_row(job_id, creds)
+        if row_num is None:
+            print(f"[WARN] Row for job {job_id} not found in sheet.")
+            return
+        # Read existing to preserve immutable columns
+        existing     = read_sheet(
+            DEFAULT_SPREADSHEET_ID,
+            f"Sheet1!A{row_num}:N{row_num}",
+            creds=creds,
+        )
+        existing_row = existing[0] if existing else [""] * 14
+        def _v(new: str, idx: int) -> str:
+            return new if new != "" else (
+                existing_row[idx] if len(existing_row) > idx else ""
+            )
+        updated_row = [
+            _v("",              0),  # A — Timestamp       (immutable)
+            job_id,                  # B — Job ID          (immutable)
+            _v(video_title,     2),  # C — Video Title
+            _v("",              3),  # D — YouTube URL     (immutable)
+            _v(model_used,      4),  # E — Model Used
+            _v(status,          5),  # F — Status
+            _v(summary_link,    6),  # G — Summary Link
+            _v(qa_link,         7),  # H — Q&A Link
+            _v(transcript_link, 8),  # I — Transcript Link
+            _v("",              9),  # J — Email Sent To   (immutable)
+            _v(email_status,   10),  # K — Email Status
+            _v(email_msg_id,   11),  # L — Email Message ID
+            _v(completed_at,   12),  # M — Completed At
+            _v(error,          13),  # N — Error
+        ]
+        write_sheet(
+            DEFAULT_SPREADSHEET_ID,
+            f"Sheet1!A{row_num}:N{row_num}",
+            [updated_row],
+            creds=creds,
+        )
+    except Exception as exc:
+        print(f"[WARN] Could not update sheet record: {exc}")
+# ============================================================================
+# STARTUP
+# ============================================================================
+@app.on_event("startup")
+def on_startup():
+    creds = load_credentials()
+    if creds:
+        ensure_sheet_header(creds=creds)
+# ============================================================================
+# BASIC ROUTES
+# ============================================================================
+@app.get("/")
+def root():
+     return FileResponse("index.html")
+@app.get("/health")
+def health():
+    creds = load_credentials()
+    return {
+        "status":        "ok",
+        "version":       "7.0.0",
+        "authenticated": creds is not None,
+        "endpoints": {
+            "auth":   ["/auth/start", "/auth/status", "/auth/revoke"],
+            "gmail":  ["/email"],
+            "drive":  ["/drive/create", "/drive/file/{id}", "/drive/files"],
+            "sheets": ["/sheets/info", "/sheets/read", "/sheets/write",
+                       "/sheets/append", "/sheets/clear"],
+            "jobs":   ["/generate", "/status/{job_id}", "/jobs"],
+            "misc":   ["/health"],
+        },
+    }
+# ============================================================================
+# OAUTH
+# ============================================================================
+@app.get("/auth/start")
+def auth_start():
+    flow        = create_flow()
+    verifier    = secrets.token_urlsafe(64)
+    challenge   = (
+        base64.urlsafe_b64encode(hashlib.sha256(verifier.encode()).digest())
+        .rstrip(b"=").decode()
+    )
+    auth_url, state = flow.authorization_url(
+        access_type="offline",
+        include_granted_scopes="true",
+        prompt="consent",
+        code_challenge=challenge,
+        code_challenge_method="S256",
+    )
+    states        = load_states()
+    states[state] = verifier
+    save_states(states)
+    return RedirectResponse(auth_url)
+@app.get("/auth/callback")
+def auth_callback(request: Request):
+    state  = request.query_params.get("state", "")
+    states = load_states()
+    if state not in states:
+        raise HTTPException(status_code=400, detail="Invalid or expired OAuth state.")
+    verifier = states.pop(state)
+    save_states(states)
+    flow = create_flow()
+    flow.fetch_token(
+        authorization_response=str(request.url).replace("https://", "http://"),
+        code_verifier=verifier,
+    )
+    Path(TOKEN_PATH).write_text(flow.credentials.to_json(), encoding="utf-8")
+    return HTMLResponse("""
+        <html>
+          <body style="font-family:sans-serif;text-align:center;padding-top:80px;
+                       background:#f0fdf4;color:#166534">
+            <h1>✅ Authorization Successful</h1>
+            <p>Gmail, Drive and Sheets are now connected.</p>
+            <p>You can close this tab.</p>
+          </body>
+        </html>
+    """)
+@app.get("/auth/status")
+def auth_status():
+    creds = load_credentials()
+    return {"authenticated": creds is not None}
+@app.delete("/auth/revoke")
+def auth_revoke():
+    for p in [Path(TOKEN_PATH), STATE_FILE]:
+        if p.exists():
+            p.unlink()
+    return {"status": "revoked"}
+# ============================================================================
+# EMAIL
+# ============================================================================
+@app.post("/email")
+def email(payload: EmailRequest):
+    creds  = require_credentials()
+    result = send_email(payload.to, payload.subject, payload.body, creds=creds)
+    return {"status": "sent", "message_id": result.get("id")}
+# ============================================================================
+# DRIVE ROUTES
+# ============================================================================
+@app.post("/drive/create")
+def drive_create(payload: CreateFileRequest):
+    creds = require_credentials()
+    file  = create_file_on_drive(
+        filename=payload.filename,
+        content=payload.content,
+        mimetype=payload.mimetype,
+        creds=creds,
+        folder_id=payload.folder_id,
+        make_public=payload.make_public,
+    )
+    return {
+        "file_id":              file["id"],
+        "name":                 file["name"],
+        "mime_type":            file.get("mimeType"),
+        "web_view_link":        file.get("webViewLink"),
+        "direct_download_link": file["direct_download_link"],
+    }
+@app.get("/drive/file/{file_id}")
+def drive_get_file(file_id: str):
+    creds = require_credentials()
+    file  = get_drive_file(file_id, creds=creds)
+    return {
+        "file_id":              file["id"],
+        "name":                 file["name"],
+        "mime_type":            file.get("mimeType"),
+        "size_bytes":           file.get("size"),
+        "created":              file.get("createdTime"),
+        "modified":             file.get("modifiedTime"),
+        "web_view_link":        file.get("webViewLink"),
+        "direct_download_link": file["direct_download_link"],
+    }
+@app.get("/drive/files")
+def drive_list_files(folder_id: str = "", limit: int = 20):
+    creds = require_credentials()
+    files = list_drive_files(
+        folder_id=folder_id or None,
+        page_size=limit,
+        creds=creds,
+    )
+    return {"count": len(files), "files": files}
+# ============================================================================
+# SHEETS ROUTES
+# ============================================================================
+@app.get("/sheets/info")
+def sheets_info(spreadsheet_id: str = DEFAULT_SPREADSHEET_ID):
+    if not spreadsheet_id:
+        raise HTTPException(status_code=400, detail="spreadsheet_id is required.")
+    return get_sheet_metadata(spreadsheet_id, creds=require_credentials())
+@app.get("/sheets/read")
+def sheets_read(
+    spreadsheet_id: str = DEFAULT_SPREADSHEET_ID,
+    range_name: str     = "Sheet1!A1:Z1000",
+):
+    if not spreadsheet_id:
+        raise HTTPException(status_code=400, detail="spreadsheet_id is required.")
+    rows = read_sheet(spreadsheet_id, range_name, creds=require_credentials())
+    return {
+        "spreadsheet_id": spreadsheet_id,
+        "range":          range_name,
+        "row_count":      len(rows),
+        "values":         rows,
+    }
+@app.post("/sheets/write")
+def sheets_write(payload: SheetWriteRequest):
+    if not payload.spreadsheet_id:
+        raise HTTPException(status_code=400, detail="spreadsheet_id is required.")
+    result = write_sheet(
+        payload.spreadsheet_id, payload.range_name, payload.values,
+        creds=require_credentials(),
+    )
+    return {
+        "status":          "written",
+        "updated_range":   result.get("updatedRange"),
+        "updated_rows":    result.get("updatedRows"),
+        "updated_columns": result.get("updatedColumns"),
+        "updated_cells":   result.get("updatedCells"),
+    }
+@app.post("/sheets/append")
+def sheets_append(payload: SheetWriteRequest):
+    if not payload.spreadsheet_id:
+        raise HTTPException(status_code=400, detail="spreadsheet_id is required.")
+    result  = append_sheet(
+        payload.spreadsheet_id, payload.range_name, payload.values,
+        creds=require_credentials(),
+    )
+    updates = result.get("updates", {})
+    return {
+        "status":        "appended",
+        "updated_range": updates.get("updatedRange"),
+        "updated_rows":  updates.get("updatedRows"),
+        "updated_cells": updates.get("updatedCells"),
+    }
+@app.post("/sheets/clear")
+def sheets_clear(payload: SheetClearRequest):
+    if not payload.spreadsheet_id:
+        raise HTTPException(status_code=400, detail="spreadsheet_id is required.")
+    result = clear_sheet(
+        payload.spreadsheet_id, payload.range_name,
+        creds=require_credentials(),
+    )
+    return {
+        "status":         "cleared",
+        "cleared_range":  result.get("clearedRange"),
+        "spreadsheet_id": result.get("spreadsheetId"),
+    }
+# ============================================================================
+# JOB STATUS ROUTES
+# ============================================================================
+@app.get("/status/{job_id}")
+def get_status(job_id: str):
+    with _jobs_lock:
+        job = _jobs.get(job_id)
+    if job is None:
+        raise HTTPException(status_code=404, detail=f"Job '{job_id}' not found.")
+    return job
+@app.get("/jobs")
+def list_jobs():
+    with _jobs_lock:
+        return {
+            "total": len(_jobs),
+            "jobs": [
+                {
+                    "job_id":       j["job_id"],
+                    "status":       j["status"],
+                    "youtube_url":  j["youtube_url"],
+                    "started_at":   j["started_at"],
+                    "completed_at": j["completed_at"],
+                }
+                for j in _jobs.values()
+            ],
+        }
+# ============================================================================
+# PIPELINE BACKGROUND WORKER
+# ============================================================================
+def _upload_with_title(
+    local_file: Path,
+    drive_name: str,
+    step_key: str,
+    job_id: str,
+    folder_id: str,
+    creds: Credentials,
+) -> dict:
+    """Copy file with video-title name, upload to Drive, clean up."""
+    _set_step(job_id, step_key, "running")
+    try:
+        tmp_path = local_file.parent / drive_name
+        if tmp_path.exists():
+            tmp_path.unlink()
+        shutil.copy2(local_file, tmp_path)
+        result = upload_file_to_drive(
+            str(tmp_path), creds=creds,
+            folder_id=folder_id, make_public=True,
+        )
+        tmp_path.unlink()
+        _set_step(job_id, step_key, "done")
+        return result
+    except Exception as exc:
+        _set_step(job_id, step_key, "failed")
+        return {"error": str(exc)}
+def _run_pipeline(job_id: str, youtube_url: str, email_to: str):
+    creds     = load_credentials()
+    timestamp = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S UTC")
+    # Create initial sheet row immediately
+    _create_sheet_record(
+        job_id=job_id,
+        timestamp=timestamp,
+        youtube_url=youtube_url,
+        email_to=email_to,
+        creds=creds,
+    )
+    try:
+        # ── STEP 1: Fetch transcript ─────────────────────────────────────
+        _update_job(job_id, status="fetching_transcript")
+        _set_step(job_id, "fetch_transcript", "running")
+        _update_sheet_record(job_id, creds, status="fetching_transcript")
+        try:
+            pipeline   = TranscriptSummaryPipeline(
+                youtube_url=youtube_url,
+                languages=["en", "en-US", "en-GB"],
+            )
+            transcript = pipeline.fetcher.run()
+            _set_step(job_id, "fetch_transcript", "done")
+            _update_sheet_record(
+                job_id, creds,
+                video_title=pipeline.video_title,
+                status="transcript_ready",
+            )
+        except Exception as exc:
+            _set_step(job_id, "fetch_transcript", "failed")
+            raise RuntimeError(f"Transcript fetch failed: {exc}")
+        video_title = pipeline.video_title
+        # ── STEP 2: Summarize ────────────────────────────────────────────
+        _update_job(job_id, status="summarizing")
+        _set_step(job_id, "summarize", "running")
+        _update_sheet_record(job_id, creds, status="summarizing")
+        try:
+            summary, qa, model_used = pipeline.summarizer.run(transcript)
+            _set_step(job_id, "summarize", "done")
+            _update_sheet_record(
+                job_id, creds,
+                model_used=model_used,
+                status="summarized",
+            )
+        except Exception as exc:
+            _set_step(job_id, "summarize", "failed")
+            raise RuntimeError(f"Summarization failed: {exc}")
+        # ── STEP 3: Create Drive folder ──────────────────────────────────
+        _update_job(job_id, status="creating_drive_folder")
+        _set_step(job_id, "create_drive_folder", "running")
+        _update_sheet_record(job_id, creds, status="creating_drive_folder")
+        try:
+            folder_id = create_drive_folder(video_title, creds=creds)
+            _set_step(job_id, "create_drive_folder", "done")
+        except Exception as exc:
+            _set_step(job_id, "create_drive_folder", "failed")
+            raise RuntimeError(f"Drive folder creation failed: {exc}")
+        # ── STEP 4–6: Upload files ───────────────────────────────────────
+        _update_job(job_id, status="uploading_drive")
+        _update_sheet_record(job_id, creds, status="uploading_to_drive")
+        summary_drive    = _upload_with_title(
+            SUMMARY_FILE,    f"{video_title}__summary.txt",    "upload_summary",
+            job_id, folder_id, creds,
+        )
+        qa_drive         = _upload_with_title(
+            QA_FILE,         f"{video_title}__qa.txt",         "upload_qa",
+            job_id, folder_id, creds,
+        )
+        transcript_drive = _upload_with_title(
+            TRANSCRIPT_FILE, f"{video_title}__transcript.txt", "upload_transcript",
+            job_id, folder_id, creds,
+        )
+        summary_link    = summary_drive.get("direct_download_link",    "N/A")
+        qa_link         = qa_drive.get("direct_download_link",         "N/A")
+        transcript_link = transcript_drive.get("direct_download_link", "N/A")
+        _update_sheet_record(
+            job_id, creds,
+            status="drive_uploaded",
+            summary_link=summary_link,
+            qa_link=qa_link,
+            transcript_link=transcript_link,
+        )
+        # ── STEP 7: Send email ───────────────────────────────────────────
+        _update_job(job_id, status="sending_email")
+        _set_step(job_id, "send_email", "running")
+        _update_sheet_record(job_id, creds, status="sending_email")
+        email_subject = f"✅ YouTube Summary Ready — {video_title}"
+        email_body    = f"""Hello,
+Your YouTube video has been processed successfully.
+🎥 Title      : {video_title}
+🔗 Video URL  : {youtube_url}
+📄 Summary    : {summary_link}
+❓ Q&A         : {qa_link}
+📝 Transcript : {transcript_link}
+────────────────────────────────
+Model Used : {model_used}
+────────────────────────────────
+Regards,
+Google Integration API
+"""
+        try:
+            email_result = send_email(
+                to=email_to, subject=email_subject,
+                body=email_body, creds=creds,
+            )
+            email_status = "sent"
+            email_msg_id = email_result.get("id", "")
+            _set_step(job_id, "send_email", "done")
+        except Exception as exc:
+            email_status = f"failed: {exc}"
+            email_msg_id = ""
+            _set_step(job_id, "send_email", "failed")
+        # ── STEP 8: Final sheet update ───────────────────────────────────
+        _update_job(job_id, status="logging_sheet")
+        _set_step(job_id, "log_sheet", "running")
+        completed_at = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S UTC")
+        try:
+            _update_sheet_record(
+                job_id, creds,
+                status="completed",
+                email_status=email_status,
+                email_msg_id=email_msg_id,
+                completed_at=completed_at,
+            )
+            _set_step(job_id, "log_sheet", "done")
+            sheets_status = "logged"
+        except Exception as exc:
+            sheets_status = f"failed: {exc}"
+            _set_step(job_id, "log_sheet", "failed")
+        # ── COMPLETE ─────────────────────────────────────────────────────
+        _update_job(
+            job_id,
+            status="completed",
+            completed_at=completed_at,
+            result={
+                "video_title": video_title,
+                "youtube_url": youtube_url,
+                "model_used":  model_used,
+                "drive": {
+                    "folder_id":  folder_id,
+                    "summary":    {
+                        "web_view_link":        summary_drive.get("webViewLink"),
+                        "direct_download_link": summary_link,
+                    },
+                    "qa":         {
+                        "web_view_link":        qa_drive.get("webViewLink"),
+                        "direct_download_link": qa_link,
+                    },
+                    "transcript": {
+                        "web_view_link":        transcript_drive.get("webViewLink"),
+                        "direct_download_link": transcript_link,
+                    },
+                },
+                "email":  {"status": email_status,  "message_id": email_msg_id},
+                "sheets": {"status": sheets_status},
+            },
+        )
+    except Exception as exc:
+        completed_at = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S UTC")
+        _update_job(
+            job_id,
+            status="failed",
+            completed_at=completed_at,
+            error=str(exc),
+        )
+        _update_sheet_record(
+            job_id, creds,
+            status="failed",
+            completed_at=completed_at,
+            error=str(exc),
+        )
+# ============================================================================
+# GENERATE ROUTE
+# ============================================================================
+@app.post("/generate")
+def generate(payload: GenerateRequest):
+    """
+    Kick off full pipeline in background thread.
+    Returns job_id immediately — poll GET /status/{job_id} for progress.
+    """
+    require_credentials()
+    job_id = secrets.token_hex(8)
+    _new_job(job_id, payload.youtube_url, payload.email_to)
+    thread = threading.Thread(
+        target=_run_pipeline,
+        args=(job_id, payload.youtube_url, payload.email_to),
+        daemon=True,
+    )
+    thread.start()
+    return {
+        "job_id":     job_id,
+        "status":     "initiated",
+        "poll_url":   f"/status/{job_id}",
+        "started_at": _jobs[job_id]["started_at"],
+    }
+# ============================================================================
+# RUN
+# ============================================================================
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run("fastapi_app:app", host="0.0.0.0", port=8000, reload=True)

gemini_transcript.py ADDED Viewed

	@@ -0,0 +1,445 @@

+#!/usr/bin/env python3
+from __future__ import annotations
+import re
+import sys
+import json
+import logging
+import time
+from pathlib import Path
+from typing import Optional, List
+from urllib.parse import urlparse, parse_qs
+# from google import genai
+from youtube_transcript_api import (
+    YouTubeTranscriptApi,
+    TranscriptsDisabled,
+    NoTranscriptFound,
+    VideoUnavailable,
+)
+# ============================================================================
+# CONFIG
+# ============================================================================
+BASE_DIR        = Path(__file__).resolve().parent
+OUTPUT_DIR      = BASE_DIR / "output"
+OUTPUT_DIR.mkdir(exist_ok=True)
+TRANSCRIPT_FILE = OUTPUT_DIR / "transcript.txt"
+SUMMARY_FILE    = OUTPUT_DIR / "summary.txt"
+QA_FILE         = OUTPUT_DIR / "qa.txt"
+GEMINI_API_KEY  = "AIzaSyCNz5wQAyJ65kNRkwr0-1A-_Z6-lQzdcyc"
+GEMINI_MODELS = [
+    "gemini-2.5-flash",
+    "gemini-2.5-flash-lite",
+    "gemini-2.5-pro",
+]
+POLLING_CONFIG = {
+    "attempt_1": {"wait_before": 0,    "description": "Immediate attempt on trigger"},
+    "attempt_2": {"wait_before": 300,  "description": "Retry after 5 minutes"},
+    "attempt_3": {"wait_before": 900,  "description": "Retry after 15 minutes"},
+    "attempt_4": {"wait_before": 1800, "description": "Retry after 30 minutes"},
+    "attempt_5": {"wait_before": 3600, "description": "Retry after 1 hour"},
+    "attempt_6": {"wait_before": 3600, "description": "Retry after 2 hours total"},
+    "attempt_7": {"wait_before": 3600, "description": "Retry after 3 hours total"},
+    "attempt_8": {"wait_before": 3600, "description": "Retry after 4 hours total"},
+    "attempt_9": {"wait_before": 3600, "description": "Final attempt at 5 hours total"},
+}
+SYSTEM_PROMPT = """
+You are an expert content summarizer and educator.
+Produce the full output containing exactly two parts separated by a line with only 5 exclamation marks:
+!!!!!
+--- PART 1: SUMMARY ---
+Write a detailed, well-structured summary of the entire content.
+Use the following structure:
+## Overview
+A 3-5 sentence high-level overview of the entire content.
+## Key Topics Covered
+List the main topics discussed, each with a brief explanation.
+## Detailed Summary
+A thorough section-by-section breakdown of the content in the order it was presented.
+Use subheadings for each major section or topic shift.
+## Key Takeaways
+A bullet list of the most important insights, facts, or conclusions from the content.
+---
+!!!!!
+--- PART 2: Q&A ---
+Generate a comprehensive Q&A section based on the content.
+Format each entry exactly like this:
+Q1: [First question]
+Answer: [Detailed answer]
+Q2: [Second question]
+Answer: [Detailed answer]
+Q3: [Third question]
+Answer: [Detailed answer]
+... and so on until all important questions are covered.
+Rules:
+- Number every question and answer with matching numbers (Q1/A1, Q2/A2, etc.)
+- Each answer must be detailed and self-contained
+- Cover all major topics, concepts, facts, and insights from the content
+- Minimum 10 Q&A pairs, more if the content is rich
+- Do NOT use bullet points inside answers — write in full sentences
+---
+"""
+# ============================================================================
+# LOGGING
+# ============================================================================
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s | %(levelname)s | %(message)s",
+)
+logger = logging.getLogger("gemini_pipeline")
+# ============================================================================
+# HELPERS
+# ============================================================================
+def _format_duration(seconds: int) -> str:
+    if seconds < 60:
+        return f"{seconds}s"
+    if seconds < 3600:
+        return f"{seconds // 60}m"
+    h = seconds // 3600
+    m = (seconds % 3600) // 60
+    return f"{h}h {m}m" if m else f"{h}h"
+def fetch_video_title(video_id: str) -> str:
+    """Fetch YouTube video title via oembed — no API key needed."""
+    try:
+        import urllib.request
+        url = (
+            f"https://www.youtube.com/oembed"
+            f"?url=https://www.youtube.com/watch?v={video_id}&format=json"
+        )
+        with urllib.request.urlopen(url, timeout=10) as resp:
+            data  = json.loads(resp.read().decode())
+            title = data.get("title", "")
+            safe  = re.sub(r'[\\/*?:"<>|]', "", title)
+            safe  = re.sub(r"\s+", "_", safe.strip())
+            return safe[:80] or video_id
+    except Exception:
+        return video_id
+# ============================================================================
+# YOUTUBE TRANSCRIPT FETCHER
+# ============================================================================
+class YouTubeTranscriptFetcher:
+    """Fetches YouTube transcript with polling retry for new uploads."""
+    def __init__(
+        self,
+        youtube_url: str,
+        output_file: Path              = TRANSCRIPT_FILE,
+        languages: Optional[List[str]] = None,
+        polling_config: dict           = None,
+    ):
+        self.youtube_url    = youtube_url
+        self.output_file    = Path(output_file)
+        self.languages      = languages or ["en", "en-US", "en-GB"]
+        self.polling_config = polling_config or POLLING_CONFIG
+        self.video_id       = self._extract_video_id(youtube_url)
+        self.api            = YouTubeTranscriptApi()
+    @staticmethod
+    def _extract_video_id(url: str) -> str:
+        parsed = urlparse(url)
+        if parsed.hostname == "youtu.be":
+            return parsed.path.lstrip("/").split("?")[0]
+        if parsed.hostname in ("youtube.com", "www.youtube.com", "m.youtube.com"):
+            path_parts = parsed.path.strip("/").split("/")
+            if path_parts[0] in ("live", "shorts", "embed") and len(path_parts) >= 2:
+                return path_parts[1].split("?")[0]
+            params = parse_qs(parsed.query)
+            if "v" in params:
+                return params["v"][0]
+            raise ValueError(f"Could not extract video ID from URL: {url}")
+        raise ValueError(f"Unsupported YouTube URL: {url}")
+    def _fetch_once(self) -> str:
+        transcript = self.api.fetch(self.video_id, languages=self.languages)
+        return " ".join(item.text for item in transcript)
+    def _save(self, text: str) -> None:
+        self.output_file.parent.mkdir(parents=True, exist_ok=True)
+        self.output_file.write_text(text, encoding="utf-8")
+    def run(self) -> str:
+        logger.info("Video ID    : %s", self.video_id)
+        logger.info("Output file : %s", self.output_file)
+        logger.info("Total polling attempts: %d", len(self.polling_config))
+        attempts = list(self.polling_config.items())
+        for idx, (attempt_key, config) in enumerate(attempts, start=1):
+            wait_before = config["wait_before"]
+            description = config["description"]
+            if wait_before > 0:
+                logger.info(
+                    "[%d/%d] %s — waiting %s before retry...",
+                    idx, len(attempts), description,
+                    _format_duration(wait_before),
+                )
+                time.sleep(wait_before)
+            logger.info(
+                "[%d/%d] %s — fetching transcript now...",
+                idx, len(attempts), description,
+            )
+            try:
+                text = self._fetch_once()
+                self._save(text)
+                logger.info(
+                    "[%d/%d] ✅ Transcript fetched — %d characters",
+                    idx, len(attempts), len(text),
+                )
+                return text
+            except TranscriptsDisabled as e:
+                logger.warning("[%d/%d] Transcripts disabled: %s", idx, len(attempts), e)
+                raise  # no point retrying
+            except VideoUnavailable as e:
+                logger.warning("[%d/%d] Video unavailable: %s", idx, len(attempts), e)
+            except NoTranscriptFound as e:
+                logger.warning("[%d/%d] No transcript yet: %s", idx, len(attempts), e)
+            except KeyboardInterrupt:
+                logger.warning("Interrupted by user.")
+                raise
+            except Exception as e:
+                logger.exception("[%d/%d] Unexpected error: %s", idx, len(attempts), e)
+            if idx < len(attempts):
+                next_cfg = attempts[idx][1]
+                logger.info(
+                    "[%d/%d] Will retry in %s (%s)",
+                    idx, len(attempts),
+                    _format_duration(next_cfg["wait_before"]),
+                    next_cfg["description"],
+                )
+            else:
+                logger.error("All %d polling attempts exhausted.", len(attempts))
+        raise RuntimeError(
+            f"Transcript not available after {len(attempts)} attempts (~5 hours). "
+            f"Video ID: {self.video_id}"
+        )
+# ============================================================================
+# GEMINI SUMMARIZER
+# ============================================================================
+class GeminiSummarizer:
+    """Sends transcript to Gemini with model fallback + per-model retry."""
+    # Retry config
+    MAX_RETRIES  = 5
+    BASE_WAIT    = 10    # seconds
+    MAX_WAIT     = 120   # seconds cap
+    # Errors → retry same model with backoff
+    RETRYABLE    = ["503", "502", "500", "UNAVAILABLE", "SERVICE_UNAVAILABLE"]
+    # Errors → skip to next model immediately
+    SKIP_TO_NEXT = ["429", "RESOURCE_EXHAUSTED", "quota", "404", "NOT_FOUND"]
+    def __init__(
+        self,
+        api_key: str       = GEMINI_API_KEY,
+        models: list       = None,
+        summary_file: Path = SUMMARY_FILE,
+        qa_file: Path      = QA_FILE,
+    ):
+        self.client       = genai.Client(api_key=api_key)
+        self.models       = models or GEMINI_MODELS
+        self.summary_file = Path(summary_file)
+        self.qa_file      = Path(qa_file)
+    def _call_api(self, transcript: str) -> tuple[str, str]:
+        """
+        Try each model in order.
+        Per model: retry up to MAX_RETRIES on transient errors with backoff.
+        Returns (response_text, model_used).
+        """
+        overall_last_error = None
+        for model in self.models:
+            logger.info("── Trying model: %s", model)
+            wait     = self.BASE_WAIT
+            last_err = None
+            for attempt in range(1, self.MAX_RETRIES + 1):
+                try:
+                    logger.info("   [%d/%d] Sending request...", attempt, self.MAX_RETRIES)
+                    response = self.client.models.generate_content(
+                        model=model,
+                        contents=transcript,
+                        config={"system_instruction": SYSTEM_PROMPT},
+                    )
+                    logger.info(
+                        "✅ Response received from: %s (attempt %d)",
+                        model, attempt,
+                    )
+                    return response.text, model
+                except Exception as e:
+                    err      = str(e)
+                    last_err = e
+                    if any(k in err for k in self.SKIP_TO_NEXT):
+                        logger.warning(
+                            "   [%d/%d] %s — quota/not-found, skipping to next model.",
+                            attempt, self.MAX_RETRIES, model,
+                        )
+                        break  # skip to next model
+                    elif any(k in err for k in self.RETRYABLE):
+                        if attempt < self.MAX_RETRIES:
+                            logger.warning(
+                                "   [%d/%d] %s — transient error. "
+                                "Retrying in %ds...",
+                                attempt, self.MAX_RETRIES, model, wait,
+                            )
+                            time.sleep(wait)
+                            wait = min(wait * 2, self.MAX_WAIT)
+                        else:
+                            logger.warning(
+                                "   [%d/%d] %s — max retries reached, "
+                                "trying next model.",
+                                attempt, self.MAX_RETRIES, model,
+                            )
+                    else:
+                        logger.error(
+                            "   [%d/%d] %s — unhandled error: %s",
+                            attempt, self.MAX_RETRIES, model, err,
+                        )
+                        raise
+            overall_last_error = last_err
+        raise RuntimeError(
+            f"All models and retries exhausted. Last error: {overall_last_error}"
+        )
+    @staticmethod
+    def _split(full_text: str) -> tuple[str, str]:
+        for pattern in (r"^\s*!{5}\s*$", r"^\s*!{3}\s*$"):
+            parts = re.split(pattern, full_text, flags=re.MULTILINE)
+            if len(parts) >= 2:
+                return parts[0].strip(), "".join(parts[1:]).strip()
+        return full_text.strip(), ""
+    def run(self, transcript: str) -> tuple[str, str, str]:
+        full, model_used = self._call_api(transcript)
+        summary, qa      = self._split(full)
+        self.summary_file.write_text(summary, encoding="utf-8")
+        self.qa_file.write_text(qa,           encoding="utf-8")
+        logger.info("Summary saved → %s", self.summary_file)
+        logger.info("Q&A saved    → %s", self.qa_file)
+        return summary, qa, model_used
+# ============================================================================
+# PIPELINE
+# ============================================================================
+class TranscriptSummaryPipeline:
+    def __init__(
+        self,
+        youtube_url: str,
+        languages: Optional[List[str]] = None,
+        polling_config: dict           = None,
+    ):
+        self.youtube_url = youtube_url
+        self.fetcher     = YouTubeTranscriptFetcher(
+            youtube_url=youtube_url,
+            output_file=TRANSCRIPT_FILE,
+            languages=languages,
+            polling_config=polling_config,
+        )
+        self.summarizer  = GeminiSummarizer()
+        self.video_id    = self.fetcher.video_id
+        self.video_title = fetch_video_title(self.video_id)
+    def run(self) -> dict:
+        logger.info("=== Pipeline started ===")
+        logger.info("Video title : %s", self.video_title)
+        transcript         = self.fetcher.run()
+        summary, qa, model = self.summarizer.run(transcript)
+        logger.info("=== Pipeline complete | model: %s ===", model)
+        return {
+            "video_id":    self.video_id,
+            "video_title": self.video_title,
+            "model_used":  model,
+            "summary":     summary,
+            "qa":          qa,
+            "transcript":  transcript,
+        }
+# ============================================================================
+# CLI
+# ============================================================================
+def main():
+    if len(sys.argv) < 2:
+        print("Usage: python gemini.py <youtube_url>", file=sys.stderr)
+        sys.exit(1)
+    pipeline = TranscriptSummaryPipeline(
+        youtube_url=sys.argv[1],
+        languages=["en", "en-US", "en-GB"],
+    )
+    result = pipeline.run()
+    for key, value in result.items():
+        if key not in ("summary", "qa", "transcript"):
+            print(f"{key}: {value}")
+if __name__ == "__main__":
+    main()

index.html ADDED Viewed

	@@ -0,0 +1,378 @@

+<!DOCTYPE html>
+<html lang="en">
+<head>
+  <meta charset="UTF-8" />
+  <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+  <title>YT Summariser</title>
+  <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@tabler/icons-webfont@latest/tabler-icons.min.css" />
+  <style>
+    *, *::before, *::after { box-sizing: border-box; margin: 0; padding: 0; }
+    body { font-family: system-ui, sans-serif; background: #f9f9f8; color: #1a1a18; min-height: 100vh; display: flex; align-items: center; justify-content: center; }
+    .wrap { width: 100%; max-width: 460px; padding: 2.5rem 1.5rem; }
+    /* ── Logo ── */
+    .logo { display: flex; align-items: center; gap: 10px; margin-bottom: 2.5rem; }
+    .logo-icon { width: 36px; height: 36px; background: #E24B4A; border-radius: 8px; display: flex; align-items: center; justify-content: center; }
+    .logo-icon i { color: #fff; font-size: 18px; }
+    .logo-text { font-size: 15px; font-weight: 600; }
+    .logo-sub  { font-size: 12px; color: #888; }
+    /* ── Auth banner ── */
+    .auth-banner {
+      display: flex; align-items: center; justify-content: space-between;
+      padding: 10px 14px; border-radius: 8px; margin-bottom: 1.5rem;
+      font-size: 13px; border: 1px solid #ddd; background: #fff;
+    }
+    .auth-banner.ok  { border-color: #86efac; background: #f0fdf4; }
+    .auth-banner.bad { border-color: #fca5a5; background: #fff5f5; }
+    .auth-left { display: flex; align-items: center; gap: 8px; }
+    .auth-dot  { width: 8px; height: 8px; border-radius: 50%; flex-shrink: 0; }
+    .auth-dot.ok  { background: #22c55e; }
+    .auth-dot.bad { background: #ef4444; }
+    .auth-text { color: #444; }
+    .auth-btn {
+      font-size: 12px; font-weight: 600; padding: 5px 12px;
+      border-radius: 6px; border: none; cursor: pointer;
+      background: #1a1a18; color: #fff; text-decoration: none;
+      display: inline-flex; align-items: center; gap: 5px;
+      transition: opacity .15s;
+    }
+    .auth-btn:hover { opacity: .8; }
+    .auth-btn.connected { background: #dcfce7; color: #166534; cursor: default; }
+    /* ── Heading ── */
+    h1 { font-size: 22px; font-weight: 600; margin-bottom: 6px; }
+    .subtitle { font-size: 14px; color: #666; margin-bottom: 2rem; line-height: 1.6; }
+    /* ── Form ── */
+    .field { margin-bottom: 1rem; }
+    label  { display: block; font-size: 13px; color: #555; margin-bottom: 5px; }
+    input[type=text], input[type=email] {
+      width: 100%; padding: 9px 12px; font-size: 14px;
+      border: 1px solid #ddd; border-radius: 8px; background: #fff;
+      outline: none; transition: border-color .15s;
+    }
+    input:focus { border-color: #E24B4A; }
+    /* ── Buttons ── */
+    .btn {
+      width: 100%; padding: 10px; font-size: 14px; font-weight: 600;
+      cursor: pointer; border-radius: 8px; border: none;
+      background: #E24B4A; color: #fff; margin-top: .5rem;
+      display: flex; align-items: center; justify-content: center; gap: 8px;
+      transition: opacity .15s;
+    }
+    .btn:hover    { opacity: .88; }
+    .btn:disabled { opacity: .5; cursor: not-allowed; }
+    /* ── Error ── */
+    .err {
+      font-size: 13px; color: #a32d2d; margin-top: 1rem; display: none;
+      padding: 10px 12px; border: 1px solid #f09595; border-radius: 8px;
+      background: #fcebeb;
+    }
+    /* ── Status box ── */
+    .status-box {
+      margin-top: 1.5rem; border: 1px solid #e5e5e3;
+      border-radius: 12px; overflow: hidden; display: none; background: #fff;
+    }
+    .status-header {
+      padding: 12px 16px; display: flex; align-items: center;
+      justify-content: space-between; border-bottom: 1px solid #e5e5e3;
+    }
+    .status-label { font-size: 13px; font-weight: 600; }
+    .badge { font-size: 11px; padding: 3px 8px; border-radius: 20px; font-weight: 600; }
+    .badge-running { background: #dbeafe; color: #1e40af; }
+    .badge-done    { background: #dcfce7; color: #166534; }
+    .badge-failed  { background: #fee2e2; color: #991b1b; }
+    /* ── Steps ── */
+    .steps { padding: 12px 16px; display: flex; flex-direction: column; gap: 9px; }
+    .step  { display: flex; align-items: center; gap: 10px; font-size: 13px; color: #999; }
+    .step.active { color: #1a1a18; }
+    .step-icon {
+      width: 20px; height: 20px; border-radius: 50%;
+      display: flex; align-items: center; justify-content: center;
+      flex-shrink: 0; font-size: 11px; border: 1px solid #ddd; background: #f5f5f4;
+    }
+    .step-icon.done    { background: #dcfce7; border-color: #86efac; color: #166534; }
+    .step-icon.running { background: #dbeafe; border-color: #93c5fd; color: #1e40af; }
+    .step-icon.failed  { background: #fee2e2; border-color: #fca5a5; color: #991b1b; }
+    /* ── Result links ── */
+    .result-box {
+      padding: 12px 16px; border-top: 1px solid #e5e5e3;
+      display: none; flex-direction: column; gap: 8px;
+    }
+    .result-link { display: flex; align-items: center; gap: 8px; font-size: 13px; }
+    .result-link a { color: #1e40af; text-decoration: none; }
+    .result-link a:hover { text-decoration: underline; }
+    .result-note { font-size: 12px; color: #888; margin-top: 2px; }
+    /* ── Spinner ── */
+    .spinner {
+      width: 11px; height: 11px; border: 1.5px solid currentColor;
+      border-top-color: transparent; border-radius: 50%;
+      animation: spin .7s linear infinite; display: inline-block;
+    }
+    @keyframes spin { to { transform: rotate(360deg); } }
+  </style>
+</head>
+<body>
+<div class="wrap">
+  <!-- Logo -->
+  <div class="logo">
+    <div class="logo-icon"><i class="ti ti-brand-youtube"></i></div>
+    <div>
+      <div class="logo-text">YT Summariser</div>
+      <div class="logo-sub">Transcript → Summary → Q&amp;A</div>
+    </div>
+  </div>
+  <!-- Auth banner -->
+  <div class="auth-banner bad" id="auth-banner">
+    <div class="auth-left">
+      <div class="auth-dot bad" id="auth-dot"></div>
+      <span class="auth-text" id="auth-text">Google not connected</span>
+    </div>
+    <a href="/auth/start" class="auth-btn" id="auth-btn" target="_blank" onclick="onAuthClick()">
+      <i class="ti ti-brand-google"></i> Connect Google
+    </a>
+  </div>
+  <!-- Heading -->
+  <h1>Summarise a YouTube video</h1>
+  <p class="subtitle">Paste a YouTube link and your email — we'll process the transcript and send results to your inbox.</p>
+  <!-- Form -->
+  <div class="field">
+    <label for="yt-url">YouTube URL</label>
+    <input type="text" id="yt-url" placeholder="https://www.youtube.com/watch?v=..." />
+  </div>
+  <div class="field">
+    <label for="email">Email address</label>
+    <input type="email" id="email" placeholder="you@example.com" />
+  </div>
+  <button class="btn" id="submit-btn" onclick="submitJob()">
+    <i class="ti ti-player-play"></i> Start processing
+  </button>
+  <div class="err" id="err-box"></div>
+  <!-- Status box -->
+  <div class="status-box" id="status-box">
+    <div class="status-header">
+      <span class="status-label" id="status-label">Processing…</span>
+      <span class="badge badge-running" id="status-badge">Running</span>
+    </div>
+    <div class="steps" id="steps-list"></div>
+    <div class="result-box" id="result-box"></div>
+  </div>
+</div>
+<script>
+  /* ── Step config ── */
+  const STEP_LABELS = {
+    fetch_transcript:    'Fetching transcript',
+    summarize:           'Summarising with Gemini',
+    create_drive_folder: 'Creating Drive folder',
+    upload_summary:      'Uploading summary',
+    upload_qa:           'Uploading Q&A',
+    upload_transcript:   'Uploading transcript',
+    send_email:          'Sending email',
+    log_sheet:           'Logging to Sheets',
+  };
+  const STEP_ICONS = {
+    fetch_transcript:    'ti-file-text',
+    summarize:           'ti-brain',
+    create_drive_folder: 'ti-folder-plus',
+    upload_summary:      'ti-upload',
+    upload_qa:           'ti-upload',
+    upload_transcript:   'ti-upload',
+    send_email:          'ti-mail',
+    log_sheet:           'ti-table',
+  };
+  let pollTimer = null;
+  /* ── Auth status check ── */
+  async function checkAuth() {
+    try {
+      const res  = await fetch('/auth/status');
+      const data = await res.json();
+      setAuthBanner(data.authenticated);
+    } catch (e) {
+      setAuthBanner(false);
+    }
+  }
+  function setAuthBanner(ok) {
+    const banner = document.getElementById('auth-banner');
+    const dot    = document.getElementById('auth-dot');
+    const text   = document.getElementById('auth-text');
+    const btn    = document.getElementById('auth-btn');
+    if (ok) {
+      banner.className = 'auth-banner ok';
+      dot.className    = 'auth-dot ok';
+      text.textContent = 'Google configured';
+      btn.className    = 'auth-btn connected';
+      btn.innerHTML    = '<i class="ti ti-circle-check"></i> Connected';
+      btn.removeAttribute('href');
+      btn.onclick      = null;
+    } else {
+      banner.className = 'auth-banner bad';
+      dot.className    = 'auth-dot bad';
+      text.textContent = 'Google not connected';
+      btn.className    = 'auth-btn';
+      btn.innerHTML    = '<i class="ti ti-brand-google"></i> Connect Google';
+      btn.href         = '/auth/start';
+      btn.onclick      = onAuthClick;
+    }
+  }
+  function onAuthClick() {
+    // After OAuth window closes, recheck
+    setTimeout(() => {
+      const check = setInterval(async () => {
+        const res  = await fetch('/auth/status');
+        const data = await res.json();
+        if (data.authenticated) { clearInterval(check); setAuthBanner(true); }
+      }, 2000);
+      // Stop checking after 2 min
+      setTimeout(() => clearInterval(check), 120000);
+    }, 3000);
+  }
+  /* ── Helpers ── */
+  function showErr(msg) {
+    const b = document.getElementById('err-box');
+    b.textContent = msg; b.style.display = 'block';
+  }
+  function clearErr() { document.getElementById('err-box').style.display = 'none'; }
+  /* ── Submit ── */
+  async function submitJob() {
+    clearErr();
+    const url   = document.getElementById('yt-url').value.trim();
+    const email = document.getElementById('email').value.trim();
+    if (!url)   { showErr('Please enter a YouTube URL.');    return; }
+    if (!email) { showErr('Please enter an email address.'); return; }
+    const btn = document.getElementById('submit-btn');
+    btn.disabled = true;
+    btn.innerHTML = '<span class="spinner"></span> Starting…';
+    try {
+      const res  = await fetch('/generate', {
+        method: 'POST',
+        headers: { 'Content-Type': 'application/json' },
+        body: JSON.stringify({ youtube_url: url, email_to: email }),
+      });
+      const data = await res.json();
+      if (!res.ok) {
+        showErr(data.detail || 'Request failed.');
+        btn.disabled = false;
+        btn.innerHTML = '<i class="ti ti-player-play"></i> Start processing';
+        return;
+      }
+      startPolling(data.job_id);
+    } catch (e) {
+      showErr('Could not reach the server. Is it running on localhost:8000?');
+      btn.disabled = false;
+      btn.innerHTML = '<i class="ti ti-player-play"></i> Start processing';
+    }
+  }
+  /* ── Polling ── */
+  function startPolling(jobId) {
+    document.getElementById('status-box').style.display = 'block';
+    renderSteps({});
+    pollTimer = setInterval(() => poll(jobId), 2500);
+  }
+  async function poll(jobId) {
+    try {
+      const res  = await fetch('/status/' + jobId);
+      const data = await res.json();
+      renderSteps(data.steps || {});
+      updateBadge(data.status);
+      const running = Object.entries(data.steps || {}).find(([, v]) => v === 'running');
+      if (running) document.getElementById('status-label').textContent = STEP_LABELS[running[0]] + '…';
+      if (data.status === 'completed') {
+        clearInterval(pollTimer);
+        document.getElementById('status-label').textContent = 'Done!';
+        showResult(data.result);
+        const btn = document.getElementById('submit-btn');
+        btn.disabled = false;
+        btn.innerHTML = '<i class="ti ti-player-play"></i> Process another';
+      } else if (data.status === 'failed') {
+        clearInterval(pollTimer);
+        document.getElementById('status-label').textContent = 'Failed';
+        showErr('Pipeline failed: ' + (data.error || 'unknown error'));
+        const btn = document.getElementById('submit-btn');
+        btn.disabled = false;
+        btn.innerHTML = '<i class="ti ti-player-play"></i> Try again';
+      }
+    } catch (e) {}
+  }
+  /* ── Render steps ── */
+  function renderSteps(steps) {
+    const list = document.getElementById('steps-list');
+    list.innerHTML = Object.entries(STEP_LABELS).map(([key, label]) => {
+      const state   = steps[key] || 'pending';
+      const iconCls = STEP_ICONS[key] || 'ti-circle';
+      let inner = '';
+      if      (state === 'done')    inner = '<i class="ti ti-check"></i>';
+      else if (state === 'running') inner = '<span class="spinner"></span>';
+      else if (state === 'failed')  inner = '<i class="ti ti-x"></i>';
+      return `<div class="step ${state !== 'pending' ? 'active' : ''}">
+        <div class="step-icon ${state}">${inner}</div>
+        <i class="ti ${iconCls}" style="font-size:14px;color:#aaa"></i>
+        <span>${label}</span>
+      </div>`;
+    }).join('');
+  }
+  function updateBadge(status) {
+    const b = document.getElementById('status-badge');
+    b.className = 'badge';
+    if      (status === 'completed') { b.classList.add('badge-done');    b.textContent = 'Completed'; }
+    else if (status === 'failed')    { b.classList.add('badge-failed');  b.textContent = 'Failed'; }
+    else                             { b.classList.add('badge-running'); b.textContent = 'Running'; }
+  }
+  /* ── Show result links ── */
+  function showResult(result) {
+    if (!result) return;
+    const box   = document.getElementById('result-box');
+    box.style.display = 'flex';
+    const drive = result.drive || {};
+    const links = [
+      ['ti-file-text',   'Summary',    drive.summary?.web_view_link],
+      ['ti-help-circle', 'Q&A',        drive.qa?.web_view_link],
+      ['ti-align-left',  'Transcript', drive.transcript?.web_view_link],
+    ];
+    box.innerHTML = links
+      .filter(([,, u]) => u)
+      .map(([icon, label, u]) => `
+        <div class="result-link">
+          <i class="ti ${icon}" style="font-size:15px;color:#888"></i>
+          <a href="${u}" target="_blank">${label} <i class="ti ti-external-link" style="font-size:11px"></i></a>
+        </div>`)
+      .join('') +
+      `<p class="result-note"><i class="ti ti-mail" style="font-size:13px;vertical-align:-2px"></i> Results also sent to your email</p>`;
+  }
+  /* ── Init ── */
+  checkAuth();
+</script>
+</body>
+</html>

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+fastapi
+uvicorn[standard]
+google-api-python-client
+google-auth-httplib2
+google-auth-oauthlib
+requests
+youtube_transcript_api