rsnarsna commited on
Commit
dda4ec3
Β·
0 Parent(s):

first commit

Browse files
Files changed (6) hide show
  1. .gitignore +220 -0
  2. client_secret.json +12 -0
  3. fastapi_app.py +1138 -0
  4. gemini_transcript.py +445 -0
  5. index.html +378 -0
  6. requirements.txt +7 -0
.gitignore ADDED
@@ -0,0 +1,220 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[codz]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ share/python-wheels/
24
+ *.egg-info/
25
+ .installed.cfg
26
+ *.egg
27
+ MANIFEST
28
+
29
+ # PyInstaller
30
+ # Usually these files are written by a python script from a template
31
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
32
+ *.manifest
33
+ *.spec
34
+
35
+ # Installer logs
36
+ pip-log.txt
37
+ pip-delete-this-directory.txt
38
+
39
+ # Unit test / coverage reports
40
+ htmlcov/
41
+ .tox/
42
+ .nox/
43
+ .coverage
44
+ .coverage.*
45
+ .cache
46
+ nosetests.xml
47
+ coverage.xml
48
+ *.cover
49
+ *.py.cover
50
+ *.lcov
51
+ .hypothesis/
52
+ .pytest_cache/
53
+ cover/
54
+
55
+ # Translations
56
+ *.mo
57
+ *.pot
58
+
59
+ # Django stuff:
60
+ *.log
61
+ local_settings.py
62
+ db.sqlite3
63
+ db.sqlite3-journal
64
+
65
+ # Flask stuff:
66
+ instance/
67
+ .webassets-cache
68
+
69
+ # Scrapy stuff:
70
+ .scrapy
71
+
72
+ # Sphinx documentation
73
+ docs/_build/
74
+
75
+ # PyBuilder
76
+ .pybuilder/
77
+ target/
78
+
79
+ # Jupyter Notebook
80
+ .ipynb_checkpoints
81
+
82
+ # IPython
83
+ profile_default/
84
+ ipython_config.py
85
+
86
+ # pyenv
87
+ # For a library or package, you might want to ignore these files since the code is
88
+ # intended to run in multiple environments; otherwise, check them in:
89
+ # .python-version
90
+
91
+ # pipenv
92
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
93
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
94
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
95
+ # install all needed dependencies.
96
+ # Pipfile.lock
97
+
98
+ # UV
99
+ # Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
100
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
101
+ # commonly ignored for libraries.
102
+ # uv.lock
103
+
104
+ # poetry
105
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
106
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
107
+ # commonly ignored for libraries.
108
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
109
+ # poetry.lock
110
+ # poetry.toml
111
+
112
+ # pdm
113
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
114
+ # pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
115
+ # https://pdm-project.org/en/latest/usage/project/#working-with-version-control
116
+ # pdm.lock
117
+ # pdm.toml
118
+ .pdm-python
119
+ .pdm-build/
120
+
121
+ # pixi
122
+ # Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
123
+ # pixi.lock
124
+ # Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
125
+ # in the .venv directory. It is recommended not to include this directory in version control.
126
+ .pixi/*
127
+ !.pixi/config.toml
128
+
129
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
130
+ __pypackages__/
131
+
132
+ # Celery stuff
133
+ celerybeat-schedule*
134
+ celerybeat.pid
135
+
136
+ # Redis
137
+ *.rdb
138
+ *.aof
139
+ *.pid
140
+
141
+ # RabbitMQ
142
+ mnesia/
143
+ rabbitmq/
144
+ rabbitmq-data/
145
+
146
+ # ActiveMQ
147
+ activemq-data/
148
+
149
+ # SageMath parsed files
150
+ *.sage.py
151
+
152
+ # Environments
153
+ .env
154
+ .envrc
155
+ .venv
156
+ env/
157
+ venv/
158
+ ENV/
159
+ env.bak/
160
+ venv.bak/
161
+
162
+ # Spyder project settings
163
+ .spyderproject
164
+ .spyproject
165
+
166
+ # Rope project settings
167
+ .ropeproject
168
+
169
+ # mkdocs documentation
170
+ /site
171
+
172
+ # mypy
173
+ .mypy_cache/
174
+ .dmypy.json
175
+ dmypy.json
176
+
177
+ # Pyre type checker
178
+ .pyre/
179
+
180
+ # pytype static type analyzer
181
+ .pytype/
182
+
183
+ # Cython debug symbols
184
+ cython_debug/
185
+
186
+ # PyCharm
187
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
188
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
189
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
190
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
191
+ # .idea/
192
+
193
+ # Abstra
194
+ # Abstra is an AI-powered process automation framework.
195
+ # Ignore directories containing user credentials, local state, and settings.
196
+ # Learn more at https://abstra.io/docs
197
+ .abstra/
198
+
199
+ # Visual Studio Code
200
+ # Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
201
+ # that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
202
+ # and can be added to the global gitignore or merged into this file. However, if you prefer,
203
+ # you could uncomment the following to ignore the entire vscode folder
204
+ # .vscode/
205
+ # Temporary file for partial code execution
206
+ tempCodeRunnerFile.py
207
+
208
+ # Ruff stuff:
209
+ .ruff_cache/
210
+
211
+ # PyPI configuration file
212
+ .pypirc
213
+
214
+ # Marimo
215
+ marimo/_static/
216
+ marimo/_lsp/
217
+ __marimo__/
218
+
219
+ # Streamlit
220
+ .streamlit/secrets.toml
client_secret.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "web": {
3
+ "client_id": "769133159215-9gbq0l5v49kmclfcq7vbq7tutck0aphd.apps.googleusercontent.com",
4
+ "project_id": "root-isotope-497908-u0",
5
+ "auth_uri": "https://accounts.google.com/o/oauth2/auth",
6
+ "token_uri": "https://oauth2.googleapis.com/token",
7
+ "auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs",
8
+ "client_secret": "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",
9
+ "redirect_uris": ["http://localhost:8000/auth/callback"],
10
+ "javascript_origins": ["http://localhost:8000"]
11
+ }
12
+ }
fastapi_app.py ADDED
@@ -0,0 +1,1138 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ import base64
4
+ import hashlib
5
+ import secrets
6
+ import shutil
7
+ import tempfile
8
+ import threading
9
+
10
+ from pathlib import Path
11
+ from email.mime.text import MIMEText
12
+ from datetime import datetime, timezone
13
+
14
+ from fastapi.responses import FileResponse
15
+ from fastapi import FastAPI, Request, HTTPException
16
+ from fastapi.responses import RedirectResponse, HTMLResponse
17
+ from pydantic import BaseModel
18
+
19
+ from google_auth_oauthlib.flow import Flow
20
+ from google.auth.transport.requests import Request as GoogleRequest
21
+ from google.oauth2.credentials import Credentials
22
+ from googleapiclient.discovery import build
23
+ from googleapiclient.http import MediaFileUpload
24
+
25
+ from gemini_transcript import TranscriptSummaryPipeline
26
+
27
+
28
+ # ============================================================================
29
+ # CONFIG
30
+ # ============================================================================
31
+
32
+ os.environ.setdefault("OAUTHLIB_INSECURE_TRANSPORT", "1")
33
+
34
+ BASE_DIR = Path(__file__).resolve().parent
35
+ CLIENT_SECRETS = os.getenv("CLIENT_SECRETS", str(BASE_DIR / "client_secret.json"))
36
+ TOKEN_PATH = os.getenv("GOOGLE_OAUTH_TOKEN_PATH", str(BASE_DIR / "Google_oauth_token.json"))
37
+ REDIRECT_URI = os.getenv("REDIRECT_URI", "http://localhost:8000/auth/callback")
38
+ STATE_FILE = BASE_DIR / "oauth_states.json"
39
+ DEFAULT_SPREADSHEET_ID = os.getenv("DEFAULT_SPREADSHEET_ID", "1XA3vW_guHBT-ktkYvhktmUqcquECBe8exGZAoSQS3Ag")
40
+ DEFAULT_DRIVE_FOLDER_ID = os.getenv("DEFAULT_DRIVE_FOLDER_ID", "1hI6dNXysR_2p9gHkDpsI-iwMExmy2hhR")
41
+
42
+ SCOPES = [
43
+ "https://www.googleapis.com/auth/spreadsheets",
44
+ "https://www.googleapis.com/auth/gmail.send",
45
+ "https://www.googleapis.com/auth/drive.file",
46
+ ]
47
+
48
+ OUTPUT_DIR = BASE_DIR / "output"
49
+ OUTPUT_DIR.mkdir(exist_ok=True)
50
+
51
+ SUMMARY_FILE = OUTPUT_DIR / "summary.txt"
52
+ QA_FILE = OUTPUT_DIR / "qa.txt"
53
+ TRANSCRIPT_FILE = OUTPUT_DIR / "transcript.txt"
54
+
55
+ SHEETS_HEADERS = [
56
+ "Timestamp", # A
57
+ "Job ID", # B
58
+ "Video Title", # C
59
+ "YouTube URL", # D
60
+ "Model Used", # E
61
+ "Status", # F
62
+ "Summary Drive Link", # G
63
+ "Q&A Drive Link", # H
64
+ "Transcript Drive Link", # I
65
+ "Email Sent To", # J
66
+ "Email Status", # K
67
+ "Email Message ID", # L
68
+ "Completed At", # M
69
+ "Error", # N
70
+ ]
71
+
72
+
73
+ # ============================================================================
74
+ # IN-MEMORY JOB STORE
75
+ # ============================================================================
76
+
77
+ _jobs: dict[str, dict] = {}
78
+ _jobs_lock = threading.Lock()
79
+
80
+ STEPS = [
81
+ "fetch_transcript",
82
+ "summarize",
83
+ "create_drive_folder",
84
+ "upload_summary",
85
+ "upload_qa",
86
+ "upload_transcript",
87
+ "send_email",
88
+ "log_sheet",
89
+ ]
90
+
91
+
92
+ def _new_job(job_id: str, youtube_url: str, email_to: str) -> dict:
93
+ job = {
94
+ "job_id": job_id,
95
+ "status": "initiated",
96
+ "youtube_url": youtube_url,
97
+ "email_to": email_to,
98
+ "started_at": datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S UTC"),
99
+ "completed_at": None,
100
+ "steps": {s: "pending" for s in STEPS},
101
+ "result": None,
102
+ "error": None,
103
+ }
104
+ with _jobs_lock:
105
+ _jobs[job_id] = job
106
+ return job
107
+
108
+
109
+ def _update_job(job_id: str, **kwargs):
110
+ with _jobs_lock:
111
+ if job_id in _jobs:
112
+ _jobs[job_id].update(kwargs)
113
+
114
+
115
+ def _set_step(job_id: str, step: str, state: str):
116
+ with _jobs_lock:
117
+ if job_id in _jobs:
118
+ _jobs[job_id]["steps"][step] = state
119
+
120
+
121
+ # ============================================================================
122
+ # APP
123
+ # ============================================================================
124
+
125
+ app = FastAPI(title="Google Integration API", version="7.0.0")
126
+
127
+
128
+ # ============================================================================
129
+ # MODELS
130
+ # ============================================================================
131
+
132
+ class GenerateRequest(BaseModel):
133
+ youtube_url: str
134
+ email_to: str
135
+
136
+ class EmailRequest(BaseModel):
137
+ to: str
138
+ subject: str
139
+ body: str
140
+
141
+ class CreateFileRequest(BaseModel):
142
+ filename: str
143
+ content: str
144
+ mimetype: str = "text/plain"
145
+ folder_id: str | None = None
146
+ make_public: bool = True
147
+
148
+ class SheetWriteRequest(BaseModel):
149
+ spreadsheet_id: str = DEFAULT_SPREADSHEET_ID
150
+ range_name: str = "Sheet1!A1"
151
+ values: list[list]
152
+
153
+ class SheetClearRequest(BaseModel):
154
+ spreadsheet_id: str = DEFAULT_SPREADSHEET_ID
155
+ range_name: str = "Sheet1!A1:Z1000"
156
+
157
+
158
+ # ============================================================================
159
+ # STATE PERSISTENCE
160
+ # ============================================================================
161
+
162
+ def load_states() -> dict:
163
+ try:
164
+ return json.loads(STATE_FILE.read_text())
165
+ except Exception:
166
+ return {}
167
+
168
+
169
+ def save_states(states: dict) -> None:
170
+ STATE_FILE.write_text(json.dumps(states))
171
+
172
+
173
+ # ============================================================================
174
+ # AUTH
175
+ # ============================================================================
176
+
177
+ def create_flow() -> Flow:
178
+ if not os.path.exists(CLIENT_SECRETS):
179
+ raise FileNotFoundError(f"Missing client secret: {CLIENT_SECRETS}")
180
+ return Flow.from_client_secrets_file(
181
+ CLIENT_SECRETS, scopes=SCOPES, redirect_uri=REDIRECT_URI
182
+ )
183
+
184
+
185
+ def load_credentials() -> Credentials | None:
186
+ if not os.path.exists(TOKEN_PATH):
187
+ return None
188
+ creds = Credentials.from_authorized_user_file(TOKEN_PATH, SCOPES)
189
+ if not creds.valid:
190
+ if creds.expired and creds.refresh_token:
191
+ creds.refresh(GoogleRequest())
192
+ Path(TOKEN_PATH).write_text(creds.to_json(), encoding="utf-8")
193
+ else:
194
+ return None
195
+ return creds
196
+
197
+
198
+ def require_credentials() -> Credentials:
199
+ creds = load_credentials()
200
+ if creds is None:
201
+ raise HTTPException(
202
+ status_code=401,
203
+ detail="Not authenticated. Visit http://localhost:8000/auth/start",
204
+ )
205
+ return creds
206
+
207
+
208
+ # ============================================================================
209
+ # GMAIL
210
+ # ============================================================================
211
+
212
+ def _raw_message(to: str, subject: str, body: str) -> dict:
213
+ msg = MIMEText(body)
214
+ msg["to"] = to
215
+ msg["subject"] = subject
216
+ return {"raw": base64.urlsafe_b64encode(msg.as_bytes()).decode()}
217
+
218
+
219
+ def send_email(to: str, subject: str, body: str, creds: Credentials = None) -> dict:
220
+ if creds is None:
221
+ creds = require_credentials()
222
+ return (
223
+ build("gmail", "v1", credentials=creds)
224
+ .users().messages()
225
+ .send(userId="me", body=_raw_message(to, subject, body))
226
+ .execute()
227
+ )
228
+
229
+
230
+ # ============================================================================
231
+ # DRIVE
232
+ # ============================================================================
233
+
234
+ def _direct_link(file_id: str) -> str:
235
+ return f"https://drive.google.com/uc?export=download&id={file_id}"
236
+
237
+
238
+ def _make_public(service, file_id: str) -> dict:
239
+ service.permissions().create(
240
+ fileId=file_id,
241
+ body={"type": "anyone", "role": "reader"},
242
+ ).execute()
243
+ return service.files().get(
244
+ fileId=file_id,
245
+ fields="id,name,webViewLink,webContentLink,mimeType,size,createdTime,modifiedTime",
246
+ ).execute()
247
+
248
+
249
+ def _drive(creds: Credentials):
250
+ return build("drive", "v3", credentials=creds)
251
+
252
+
253
+ def create_drive_folder(
254
+ folder_name: str,
255
+ parent_folder_id: str | None = None,
256
+ creds: Credentials = None,
257
+ ) -> str:
258
+ if creds is None:
259
+ creds = require_credentials()
260
+ meta = {
261
+ "name": folder_name,
262
+ "mimeType": "application/vnd.google-apps.folder",
263
+ }
264
+ pid = parent_folder_id or DEFAULT_DRIVE_FOLDER_ID
265
+ if pid:
266
+ meta["parents"] = [pid]
267
+ folder = _drive(creds).files().create(body=meta, fields="id").execute()
268
+ return folder["id"]
269
+
270
+
271
+ def upload_file_to_drive(
272
+ filepath: str,
273
+ creds: Credentials = None,
274
+ folder_id: str | None = None,
275
+ make_public: bool = True,
276
+ ) -> dict:
277
+ if creds is None:
278
+ creds = require_credentials()
279
+ svc = _drive(creds)
280
+ meta = {"name": os.path.basename(filepath)}
281
+ if folder_id:
282
+ meta["parents"] = [folder_id]
283
+ media = MediaFileUpload(filepath, resumable=True)
284
+ file = svc.files().create(
285
+ body=meta, media_body=media,
286
+ fields="id,name,webViewLink,webContentLink,mimeType,size",
287
+ ).execute()
288
+ file_id = file["id"]
289
+ if make_public:
290
+ file = _make_public(svc, file_id)
291
+ file["direct_download_link"] = _direct_link(file_id)
292
+ return file
293
+
294
+
295
+ def create_file_on_drive(
296
+ filename: str,
297
+ content: str,
298
+ mimetype: str = "text/plain",
299
+ creds: Credentials = None,
300
+ folder_id: str | None = None,
301
+ make_public: bool = True,
302
+ ) -> dict:
303
+ if creds is None:
304
+ creds = require_credentials()
305
+ svc = _drive(creds)
306
+ meta = {"name": filename}
307
+ if folder_id:
308
+ meta["parents"] = [folder_id]
309
+
310
+ suffix = Path(filename).suffix or ".txt"
311
+ tmp = tempfile.NamedTemporaryFile(
312
+ mode="w", suffix=suffix, delete=False, encoding="utf-8"
313
+ )
314
+ tmp.write(content)
315
+ tmp.close()
316
+ tmp_path = tmp.name
317
+
318
+ try:
319
+ media = MediaFileUpload(tmp_path, mimetype=mimetype, resumable=True)
320
+ file = svc.files().create(
321
+ body=meta, media_body=media,
322
+ fields="id,name,webViewLink,webContentLink,mimeType,size",
323
+ ).execute()
324
+ media._fd.close()
325
+ finally:
326
+ try:
327
+ os.unlink(tmp_path)
328
+ except Exception:
329
+ pass
330
+
331
+ file_id = file["id"]
332
+ if make_public:
333
+ file = _make_public(svc, file_id)
334
+ file["direct_download_link"] = _direct_link(file_id)
335
+ return file
336
+
337
+
338
+ def get_drive_file(file_id: str, creds: Credentials = None) -> dict:
339
+ if creds is None:
340
+ creds = require_credentials()
341
+ file = _drive(creds).files().get(
342
+ fileId=file_id,
343
+ fields="id,name,webViewLink,webContentLink,mimeType,size,createdTime,modifiedTime",
344
+ ).execute()
345
+ file["direct_download_link"] = _direct_link(file_id)
346
+ return file
347
+
348
+
349
+ def list_drive_files(
350
+ folder_id: str | None = None,
351
+ page_size: int = 20,
352
+ creds: Credentials = None,
353
+ ) -> list:
354
+ if creds is None:
355
+ creds = require_credentials()
356
+ fid = folder_id or DEFAULT_DRIVE_FOLDER_ID
357
+ query = f"'{fid}' in parents and trashed=false" if fid else "trashed=false"
358
+ files = _drive(creds).files().list(
359
+ q=query, pageSize=page_size,
360
+ fields="files(id,name,webViewLink,webContentLink,mimeType,size,createdTime)",
361
+ ).execute().get("files", [])
362
+ for f in files:
363
+ f["direct_download_link"] = _direct_link(f["id"])
364
+ return files
365
+
366
+
367
+ # ============================================================================
368
+ # SHEETS
369
+ # ============================================================================
370
+
371
+ def _sheets(creds: Credentials):
372
+ return build("sheets", "v4", credentials=creds)
373
+
374
+
375
+ def read_sheet(
376
+ spreadsheet_id: str,
377
+ range_name: str = "Sheet1!A1:Z1000",
378
+ creds: Credentials = None,
379
+ ) -> list:
380
+ if creds is None:
381
+ creds = require_credentials()
382
+ return (
383
+ _sheets(creds).spreadsheets().values()
384
+ .get(spreadsheetId=spreadsheet_id, range=range_name)
385
+ .execute().get("values", [])
386
+ )
387
+
388
+
389
+ def write_sheet(
390
+ spreadsheet_id: str,
391
+ range_name: str,
392
+ values: list[list],
393
+ creds: Credentials = None,
394
+ ) -> dict:
395
+ if creds is None:
396
+ creds = require_credentials()
397
+ return (
398
+ _sheets(creds).spreadsheets().values()
399
+ .update(
400
+ spreadsheetId=spreadsheet_id,
401
+ range=range_name,
402
+ valueInputOption="USER_ENTERED",
403
+ body={"values": values},
404
+ ).execute()
405
+ )
406
+
407
+
408
+ def append_sheet(
409
+ spreadsheet_id: str,
410
+ range_name: str,
411
+ values: list[list],
412
+ creds: Credentials = None,
413
+ ) -> dict:
414
+ if creds is None:
415
+ creds = require_credentials()
416
+ return (
417
+ _sheets(creds).spreadsheets().values()
418
+ .append(
419
+ spreadsheetId=spreadsheet_id,
420
+ range=range_name,
421
+ valueInputOption="USER_ENTERED",
422
+ insertDataOption="INSERT_ROWS",
423
+ body={"values": values},
424
+ ).execute()
425
+ )
426
+
427
+
428
+ def clear_sheet(
429
+ spreadsheet_id: str,
430
+ range_name: str,
431
+ creds: Credentials = None,
432
+ ) -> dict:
433
+ if creds is None:
434
+ creds = require_credentials()
435
+ return (
436
+ _sheets(creds).spreadsheets().values()
437
+ .clear(spreadsheetId=spreadsheet_id, range=range_name)
438
+ .execute()
439
+ )
440
+
441
+
442
+ def get_sheet_metadata(
443
+ spreadsheet_id: str,
444
+ creds: Credentials = None,
445
+ ) -> dict:
446
+ if creds is None:
447
+ creds = require_credentials()
448
+ info = _sheets(creds).spreadsheets().get(spreadsheetId=spreadsheet_id).execute()
449
+ return {
450
+ "spreadsheet_id": info["spreadsheetId"],
451
+ "title": info["properties"]["title"],
452
+ "url": f"https://docs.google.com/spreadsheets/d/{info['spreadsheetId']}",
453
+ "sheets": [
454
+ {
455
+ "sheet_id": s["properties"]["sheetId"],
456
+ "title": s["properties"]["title"],
457
+ "rows": s["properties"]["gridProperties"]["rowCount"],
458
+ "cols": s["properties"]["gridProperties"]["columnCount"],
459
+ }
460
+ for s in info.get("sheets", [])
461
+ ],
462
+ }
463
+
464
+
465
+ def append_row_to_sheet(
466
+ values: list,
467
+ spreadsheet_id: str = DEFAULT_SPREADSHEET_ID,
468
+ range_name: str = "Sheet1!A1",
469
+ creds: Credentials = None,
470
+ ):
471
+ if not spreadsheet_id:
472
+ return None
473
+ return append_sheet(spreadsheet_id, range_name, [values], creds=creds)
474
+
475
+
476
+ # ============================================================================
477
+ # SHEETS β€” JOB RECORD HELPERS
478
+ # ============================================================================
479
+
480
+ def ensure_sheet_header(creds: Credentials = None) -> None:
481
+ if not DEFAULT_SPREADSHEET_ID:
482
+ return
483
+ try:
484
+ existing = read_sheet(DEFAULT_SPREADSHEET_ID, "Sheet1!A1:Z1", creds=creds)
485
+ if not existing:
486
+ write_sheet(
487
+ DEFAULT_SPREADSHEET_ID,
488
+ "Sheet1!A1",
489
+ [SHEETS_HEADERS],
490
+ creds=creds,
491
+ )
492
+ except Exception as exc:
493
+ print(f"[WARN] Could not write sheet header: {exc}")
494
+
495
+
496
+ def _find_job_row(job_id: str, creds: Credentials) -> int | None:
497
+ """Find 1-based row number of job_id in column B."""
498
+ try:
499
+ rows = read_sheet(DEFAULT_SPREADSHEET_ID, "Sheet1!B:B", creds=creds)
500
+ for i, row in enumerate(rows, start=1):
501
+ if row and row[0] == job_id:
502
+ return i
503
+ except Exception:
504
+ pass
505
+ return None
506
+
507
+
508
+ def _create_sheet_record(
509
+ job_id: str,
510
+ timestamp: str,
511
+ youtube_url: str,
512
+ email_to: str,
513
+ creds: Credentials,
514
+ ) -> None:
515
+ """Insert initial row when job starts."""
516
+ try:
517
+ row = [
518
+ timestamp, # A β€” Timestamp
519
+ job_id, # B β€” Job ID
520
+ "", # C β€” Video Title
521
+ youtube_url, # D β€” YouTube URL
522
+ "", # E β€” Model Used
523
+ "initiated", # F β€” Status
524
+ "", # G β€” Summary Link
525
+ "", # H β€” Q&A Link
526
+ "", # I β€” Transcript Link
527
+ email_to, # J β€” Email Sent To
528
+ "", # K β€” Email Status
529
+ "", # L β€” Email Message ID
530
+ "", # M β€” Completed At
531
+ "", # N β€” Error
532
+ ]
533
+ append_sheet(DEFAULT_SPREADSHEET_ID, "Sheet1!A1", [row], creds=creds)
534
+ except Exception as exc:
535
+ print(f"[WARN] Could not create sheet record: {exc}")
536
+
537
+
538
+ def _update_sheet_record(
539
+ job_id: str,
540
+ creds: Credentials,
541
+ video_title: str = "",
542
+ model_used: str = "",
543
+ status: str = "",
544
+ summary_link: str = "",
545
+ qa_link: str = "",
546
+ transcript_link: str = "",
547
+ email_status: str = "",
548
+ email_msg_id: str = "",
549
+ completed_at: str = "",
550
+ error: str = "",
551
+ ) -> None:
552
+ """Find job row by job_id and overwrite with updated values."""
553
+ if not DEFAULT_SPREADSHEET_ID:
554
+ return
555
+ try:
556
+ row_num = _find_job_row(job_id, creds)
557
+ if row_num is None:
558
+ print(f"[WARN] Row for job {job_id} not found in sheet.")
559
+ return
560
+
561
+ # Read existing to preserve immutable columns
562
+ existing = read_sheet(
563
+ DEFAULT_SPREADSHEET_ID,
564
+ f"Sheet1!A{row_num}:N{row_num}",
565
+ creds=creds,
566
+ )
567
+ existing_row = existing[0] if existing else [""] * 14
568
+
569
+ def _v(new: str, idx: int) -> str:
570
+ return new if new != "" else (
571
+ existing_row[idx] if len(existing_row) > idx else ""
572
+ )
573
+
574
+ updated_row = [
575
+ _v("", 0), # A β€” Timestamp (immutable)
576
+ job_id, # B β€” Job ID (immutable)
577
+ _v(video_title, 2), # C β€” Video Title
578
+ _v("", 3), # D β€” YouTube URL (immutable)
579
+ _v(model_used, 4), # E β€” Model Used
580
+ _v(status, 5), # F β€” Status
581
+ _v(summary_link, 6), # G β€” Summary Link
582
+ _v(qa_link, 7), # H β€” Q&A Link
583
+ _v(transcript_link, 8), # I β€” Transcript Link
584
+ _v("", 9), # J β€” Email Sent To (immutable)
585
+ _v(email_status, 10), # K β€” Email Status
586
+ _v(email_msg_id, 11), # L β€” Email Message ID
587
+ _v(completed_at, 12), # M β€” Completed At
588
+ _v(error, 13), # N β€” Error
589
+ ]
590
+
591
+ write_sheet(
592
+ DEFAULT_SPREADSHEET_ID,
593
+ f"Sheet1!A{row_num}:N{row_num}",
594
+ [updated_row],
595
+ creds=creds,
596
+ )
597
+ except Exception as exc:
598
+ print(f"[WARN] Could not update sheet record: {exc}")
599
+
600
+
601
+ # ============================================================================
602
+ # STARTUP
603
+ # ============================================================================
604
+
605
+ @app.on_event("startup")
606
+ def on_startup():
607
+ creds = load_credentials()
608
+ if creds:
609
+ ensure_sheet_header(creds=creds)
610
+
611
+
612
+ # ============================================================================
613
+ # BASIC ROUTES
614
+ # ============================================================================
615
+
616
+ @app.get("/")
617
+ def root():
618
+ return FileResponse("index.html")
619
+
620
+
621
+ @app.get("/health")
622
+ def health():
623
+ creds = load_credentials()
624
+ return {
625
+ "status": "ok",
626
+ "version": "7.0.0",
627
+ "authenticated": creds is not None,
628
+ "endpoints": {
629
+ "auth": ["/auth/start", "/auth/status", "/auth/revoke"],
630
+ "gmail": ["/email"],
631
+ "drive": ["/drive/create", "/drive/file/{id}", "/drive/files"],
632
+ "sheets": ["/sheets/info", "/sheets/read", "/sheets/write",
633
+ "/sheets/append", "/sheets/clear"],
634
+ "jobs": ["/generate", "/status/{job_id}", "/jobs"],
635
+ "misc": ["/health"],
636
+ },
637
+ }
638
+
639
+
640
+ # ============================================================================
641
+ # OAUTH
642
+ # ============================================================================
643
+
644
+ @app.get("/auth/start")
645
+ def auth_start():
646
+ flow = create_flow()
647
+ verifier = secrets.token_urlsafe(64)
648
+ challenge = (
649
+ base64.urlsafe_b64encode(hashlib.sha256(verifier.encode()).digest())
650
+ .rstrip(b"=").decode()
651
+ )
652
+ auth_url, state = flow.authorization_url(
653
+ access_type="offline",
654
+ include_granted_scopes="true",
655
+ prompt="consent",
656
+ code_challenge=challenge,
657
+ code_challenge_method="S256",
658
+ )
659
+ states = load_states()
660
+ states[state] = verifier
661
+ save_states(states)
662
+ return RedirectResponse(auth_url)
663
+
664
+
665
+ @app.get("/auth/callback")
666
+ def auth_callback(request: Request):
667
+ state = request.query_params.get("state", "")
668
+ states = load_states()
669
+ if state not in states:
670
+ raise HTTPException(status_code=400, detail="Invalid or expired OAuth state.")
671
+ verifier = states.pop(state)
672
+ save_states(states)
673
+ flow = create_flow()
674
+ flow.fetch_token(
675
+ authorization_response=str(request.url).replace("https://", "http://"),
676
+ code_verifier=verifier,
677
+ )
678
+ Path(TOKEN_PATH).write_text(flow.credentials.to_json(), encoding="utf-8")
679
+ return HTMLResponse("""
680
+ <html>
681
+ <body style="font-family:sans-serif;text-align:center;padding-top:80px;
682
+ background:#f0fdf4;color:#166534">
683
+ <h1>βœ… Authorization Successful</h1>
684
+ <p>Gmail, Drive and Sheets are now connected.</p>
685
+ <p>You can close this tab.</p>
686
+ </body>
687
+ </html>
688
+ """)
689
+
690
+
691
+ @app.get("/auth/status")
692
+ def auth_status():
693
+ creds = load_credentials()
694
+ return {"authenticated": creds is not None}
695
+
696
+
697
+ @app.delete("/auth/revoke")
698
+ def auth_revoke():
699
+ for p in [Path(TOKEN_PATH), STATE_FILE]:
700
+ if p.exists():
701
+ p.unlink()
702
+ return {"status": "revoked"}
703
+
704
+
705
+ # ============================================================================
706
+ # EMAIL
707
+ # ============================================================================
708
+
709
+ @app.post("/email")
710
+ def email(payload: EmailRequest):
711
+ creds = require_credentials()
712
+ result = send_email(payload.to, payload.subject, payload.body, creds=creds)
713
+ return {"status": "sent", "message_id": result.get("id")}
714
+
715
+
716
+ # ============================================================================
717
+ # DRIVE ROUTES
718
+ # ============================================================================
719
+
720
+ @app.post("/drive/create")
721
+ def drive_create(payload: CreateFileRequest):
722
+ creds = require_credentials()
723
+ file = create_file_on_drive(
724
+ filename=payload.filename,
725
+ content=payload.content,
726
+ mimetype=payload.mimetype,
727
+ creds=creds,
728
+ folder_id=payload.folder_id,
729
+ make_public=payload.make_public,
730
+ )
731
+ return {
732
+ "file_id": file["id"],
733
+ "name": file["name"],
734
+ "mime_type": file.get("mimeType"),
735
+ "web_view_link": file.get("webViewLink"),
736
+ "direct_download_link": file["direct_download_link"],
737
+ }
738
+
739
+
740
+ @app.get("/drive/file/{file_id}")
741
+ def drive_get_file(file_id: str):
742
+ creds = require_credentials()
743
+ file = get_drive_file(file_id, creds=creds)
744
+ return {
745
+ "file_id": file["id"],
746
+ "name": file["name"],
747
+ "mime_type": file.get("mimeType"),
748
+ "size_bytes": file.get("size"),
749
+ "created": file.get("createdTime"),
750
+ "modified": file.get("modifiedTime"),
751
+ "web_view_link": file.get("webViewLink"),
752
+ "direct_download_link": file["direct_download_link"],
753
+ }
754
+
755
+
756
+ @app.get("/drive/files")
757
+ def drive_list_files(folder_id: str = "", limit: int = 20):
758
+ creds = require_credentials()
759
+ files = list_drive_files(
760
+ folder_id=folder_id or None,
761
+ page_size=limit,
762
+ creds=creds,
763
+ )
764
+ return {"count": len(files), "files": files}
765
+
766
+
767
+ # ============================================================================
768
+ # SHEETS ROUTES
769
+ # ============================================================================
770
+
771
+ @app.get("/sheets/info")
772
+ def sheets_info(spreadsheet_id: str = DEFAULT_SPREADSHEET_ID):
773
+ if not spreadsheet_id:
774
+ raise HTTPException(status_code=400, detail="spreadsheet_id is required.")
775
+ return get_sheet_metadata(spreadsheet_id, creds=require_credentials())
776
+
777
+
778
+ @app.get("/sheets/read")
779
+ def sheets_read(
780
+ spreadsheet_id: str = DEFAULT_SPREADSHEET_ID,
781
+ range_name: str = "Sheet1!A1:Z1000",
782
+ ):
783
+ if not spreadsheet_id:
784
+ raise HTTPException(status_code=400, detail="spreadsheet_id is required.")
785
+ rows = read_sheet(spreadsheet_id, range_name, creds=require_credentials())
786
+ return {
787
+ "spreadsheet_id": spreadsheet_id,
788
+ "range": range_name,
789
+ "row_count": len(rows),
790
+ "values": rows,
791
+ }
792
+
793
+
794
+ @app.post("/sheets/write")
795
+ def sheets_write(payload: SheetWriteRequest):
796
+ if not payload.spreadsheet_id:
797
+ raise HTTPException(status_code=400, detail="spreadsheet_id is required.")
798
+ result = write_sheet(
799
+ payload.spreadsheet_id, payload.range_name, payload.values,
800
+ creds=require_credentials(),
801
+ )
802
+ return {
803
+ "status": "written",
804
+ "updated_range": result.get("updatedRange"),
805
+ "updated_rows": result.get("updatedRows"),
806
+ "updated_columns": result.get("updatedColumns"),
807
+ "updated_cells": result.get("updatedCells"),
808
+ }
809
+
810
+
811
+ @app.post("/sheets/append")
812
+ def sheets_append(payload: SheetWriteRequest):
813
+ if not payload.spreadsheet_id:
814
+ raise HTTPException(status_code=400, detail="spreadsheet_id is required.")
815
+ result = append_sheet(
816
+ payload.spreadsheet_id, payload.range_name, payload.values,
817
+ creds=require_credentials(),
818
+ )
819
+ updates = result.get("updates", {})
820
+ return {
821
+ "status": "appended",
822
+ "updated_range": updates.get("updatedRange"),
823
+ "updated_rows": updates.get("updatedRows"),
824
+ "updated_cells": updates.get("updatedCells"),
825
+ }
826
+
827
+
828
+ @app.post("/sheets/clear")
829
+ def sheets_clear(payload: SheetClearRequest):
830
+ if not payload.spreadsheet_id:
831
+ raise HTTPException(status_code=400, detail="spreadsheet_id is required.")
832
+ result = clear_sheet(
833
+ payload.spreadsheet_id, payload.range_name,
834
+ creds=require_credentials(),
835
+ )
836
+ return {
837
+ "status": "cleared",
838
+ "cleared_range": result.get("clearedRange"),
839
+ "spreadsheet_id": result.get("spreadsheetId"),
840
+ }
841
+
842
+
843
+ # ============================================================================
844
+ # JOB STATUS ROUTES
845
+ # ============================================================================
846
+
847
+ @app.get("/status/{job_id}")
848
+ def get_status(job_id: str):
849
+ with _jobs_lock:
850
+ job = _jobs.get(job_id)
851
+ if job is None:
852
+ raise HTTPException(status_code=404, detail=f"Job '{job_id}' not found.")
853
+ return job
854
+
855
+
856
+ @app.get("/jobs")
857
+ def list_jobs():
858
+ with _jobs_lock:
859
+ return {
860
+ "total": len(_jobs),
861
+ "jobs": [
862
+ {
863
+ "job_id": j["job_id"],
864
+ "status": j["status"],
865
+ "youtube_url": j["youtube_url"],
866
+ "started_at": j["started_at"],
867
+ "completed_at": j["completed_at"],
868
+ }
869
+ for j in _jobs.values()
870
+ ],
871
+ }
872
+
873
+
874
+ # ============================================================================
875
+ # PIPELINE BACKGROUND WORKER
876
+ # ============================================================================
877
+
878
+ def _upload_with_title(
879
+ local_file: Path,
880
+ drive_name: str,
881
+ step_key: str,
882
+ job_id: str,
883
+ folder_id: str,
884
+ creds: Credentials,
885
+ ) -> dict:
886
+ """Copy file with video-title name, upload to Drive, clean up."""
887
+ _set_step(job_id, step_key, "running")
888
+ try:
889
+ tmp_path = local_file.parent / drive_name
890
+ if tmp_path.exists():
891
+ tmp_path.unlink()
892
+ shutil.copy2(local_file, tmp_path)
893
+ result = upload_file_to_drive(
894
+ str(tmp_path), creds=creds,
895
+ folder_id=folder_id, make_public=True,
896
+ )
897
+ tmp_path.unlink()
898
+ _set_step(job_id, step_key, "done")
899
+ return result
900
+ except Exception as exc:
901
+ _set_step(job_id, step_key, "failed")
902
+ return {"error": str(exc)}
903
+
904
+
905
+ def _run_pipeline(job_id: str, youtube_url: str, email_to: str):
906
+ creds = load_credentials()
907
+ timestamp = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S UTC")
908
+
909
+ # Create initial sheet row immediately
910
+ _create_sheet_record(
911
+ job_id=job_id,
912
+ timestamp=timestamp,
913
+ youtube_url=youtube_url,
914
+ email_to=email_to,
915
+ creds=creds,
916
+ )
917
+
918
+ try:
919
+ # ── STEP 1: Fetch transcript ─────────────────────────────────────
920
+ _update_job(job_id, status="fetching_transcript")
921
+ _set_step(job_id, "fetch_transcript", "running")
922
+ _update_sheet_record(job_id, creds, status="fetching_transcript")
923
+
924
+ try:
925
+ pipeline = TranscriptSummaryPipeline(
926
+ youtube_url=youtube_url,
927
+ languages=["en", "en-US", "en-GB"],
928
+ )
929
+ transcript = pipeline.fetcher.run()
930
+ _set_step(job_id, "fetch_transcript", "done")
931
+ _update_sheet_record(
932
+ job_id, creds,
933
+ video_title=pipeline.video_title,
934
+ status="transcript_ready",
935
+ )
936
+ except Exception as exc:
937
+ _set_step(job_id, "fetch_transcript", "failed")
938
+ raise RuntimeError(f"Transcript fetch failed: {exc}")
939
+
940
+ video_title = pipeline.video_title
941
+
942
+ # ── STEP 2: Summarize ────────────────────────────────────────────
943
+ _update_job(job_id, status="summarizing")
944
+ _set_step(job_id, "summarize", "running")
945
+ _update_sheet_record(job_id, creds, status="summarizing")
946
+
947
+ try:
948
+ summary, qa, model_used = pipeline.summarizer.run(transcript)
949
+ _set_step(job_id, "summarize", "done")
950
+ _update_sheet_record(
951
+ job_id, creds,
952
+ model_used=model_used,
953
+ status="summarized",
954
+ )
955
+ except Exception as exc:
956
+ _set_step(job_id, "summarize", "failed")
957
+ raise RuntimeError(f"Summarization failed: {exc}")
958
+
959
+ # ── STEP 3: Create Drive folder ──────────────────────────────────
960
+ _update_job(job_id, status="creating_drive_folder")
961
+ _set_step(job_id, "create_drive_folder", "running")
962
+ _update_sheet_record(job_id, creds, status="creating_drive_folder")
963
+
964
+ try:
965
+ folder_id = create_drive_folder(video_title, creds=creds)
966
+ _set_step(job_id, "create_drive_folder", "done")
967
+ except Exception as exc:
968
+ _set_step(job_id, "create_drive_folder", "failed")
969
+ raise RuntimeError(f"Drive folder creation failed: {exc}")
970
+
971
+ # ── STEP 4–6: Upload files ───────────────────────────────────────
972
+ _update_job(job_id, status="uploading_drive")
973
+ _update_sheet_record(job_id, creds, status="uploading_to_drive")
974
+
975
+ summary_drive = _upload_with_title(
976
+ SUMMARY_FILE, f"{video_title}__summary.txt", "upload_summary",
977
+ job_id, folder_id, creds,
978
+ )
979
+ qa_drive = _upload_with_title(
980
+ QA_FILE, f"{video_title}__qa.txt", "upload_qa",
981
+ job_id, folder_id, creds,
982
+ )
983
+ transcript_drive = _upload_with_title(
984
+ TRANSCRIPT_FILE, f"{video_title}__transcript.txt", "upload_transcript",
985
+ job_id, folder_id, creds,
986
+ )
987
+
988
+ summary_link = summary_drive.get("direct_download_link", "N/A")
989
+ qa_link = qa_drive.get("direct_download_link", "N/A")
990
+ transcript_link = transcript_drive.get("direct_download_link", "N/A")
991
+
992
+ _update_sheet_record(
993
+ job_id, creds,
994
+ status="drive_uploaded",
995
+ summary_link=summary_link,
996
+ qa_link=qa_link,
997
+ transcript_link=transcript_link,
998
+ )
999
+
1000
+ # ── STEP 7: Send email ───────────────────────────────────────────
1001
+ _update_job(job_id, status="sending_email")
1002
+ _set_step(job_id, "send_email", "running")
1003
+ _update_sheet_record(job_id, creds, status="sending_email")
1004
+
1005
+ email_subject = f"βœ… YouTube Summary Ready β€” {video_title}"
1006
+ email_body = f"""Hello,
1007
+
1008
+ Your YouTube video has been processed successfully.
1009
+
1010
+ πŸŽ₯ Title : {video_title}
1011
+ πŸ”— Video URL : {youtube_url}
1012
+
1013
+ πŸ“„ Summary : {summary_link}
1014
+ ❓ Q&A : {qa_link}
1015
+ πŸ“ Transcript : {transcript_link}
1016
+
1017
+ ────────────────────────────────
1018
+ Model Used : {model_used}
1019
+ ────────────────────────────────
1020
+
1021
+ Regards,
1022
+ Google Integration API
1023
+ """
1024
+ try:
1025
+ email_result = send_email(
1026
+ to=email_to, subject=email_subject,
1027
+ body=email_body, creds=creds,
1028
+ )
1029
+ email_status = "sent"
1030
+ email_msg_id = email_result.get("id", "")
1031
+ _set_step(job_id, "send_email", "done")
1032
+ except Exception as exc:
1033
+ email_status = f"failed: {exc}"
1034
+ email_msg_id = ""
1035
+ _set_step(job_id, "send_email", "failed")
1036
+
1037
+ # ── STEP 8: Final sheet update ───────────────────────────────────
1038
+ _update_job(job_id, status="logging_sheet")
1039
+ _set_step(job_id, "log_sheet", "running")
1040
+
1041
+ completed_at = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S UTC")
1042
+
1043
+ try:
1044
+ _update_sheet_record(
1045
+ job_id, creds,
1046
+ status="completed",
1047
+ email_status=email_status,
1048
+ email_msg_id=email_msg_id,
1049
+ completed_at=completed_at,
1050
+ )
1051
+ _set_step(job_id, "log_sheet", "done")
1052
+ sheets_status = "logged"
1053
+ except Exception as exc:
1054
+ sheets_status = f"failed: {exc}"
1055
+ _set_step(job_id, "log_sheet", "failed")
1056
+
1057
+ # ── COMPLETE ─────────────────────────────────────────────────────
1058
+ _update_job(
1059
+ job_id,
1060
+ status="completed",
1061
+ completed_at=completed_at,
1062
+ result={
1063
+ "video_title": video_title,
1064
+ "youtube_url": youtube_url,
1065
+ "model_used": model_used,
1066
+ "drive": {
1067
+ "folder_id": folder_id,
1068
+ "summary": {
1069
+ "web_view_link": summary_drive.get("webViewLink"),
1070
+ "direct_download_link": summary_link,
1071
+ },
1072
+ "qa": {
1073
+ "web_view_link": qa_drive.get("webViewLink"),
1074
+ "direct_download_link": qa_link,
1075
+ },
1076
+ "transcript": {
1077
+ "web_view_link": transcript_drive.get("webViewLink"),
1078
+ "direct_download_link": transcript_link,
1079
+ },
1080
+ },
1081
+ "email": {"status": email_status, "message_id": email_msg_id},
1082
+ "sheets": {"status": sheets_status},
1083
+ },
1084
+ )
1085
+
1086
+ except Exception as exc:
1087
+ completed_at = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S UTC")
1088
+ _update_job(
1089
+ job_id,
1090
+ status="failed",
1091
+ completed_at=completed_at,
1092
+ error=str(exc),
1093
+ )
1094
+ _update_sheet_record(
1095
+ job_id, creds,
1096
+ status="failed",
1097
+ completed_at=completed_at,
1098
+ error=str(exc),
1099
+ )
1100
+
1101
+
1102
+ # ============================================================================
1103
+ # GENERATE ROUTE
1104
+ # ============================================================================
1105
+
1106
+ @app.post("/generate")
1107
+ def generate(payload: GenerateRequest):
1108
+ """
1109
+ Kick off full pipeline in background thread.
1110
+ Returns job_id immediately β€” poll GET /status/{job_id} for progress.
1111
+ """
1112
+ require_credentials()
1113
+
1114
+ job_id = secrets.token_hex(8)
1115
+ _new_job(job_id, payload.youtube_url, payload.email_to)
1116
+
1117
+ thread = threading.Thread(
1118
+ target=_run_pipeline,
1119
+ args=(job_id, payload.youtube_url, payload.email_to),
1120
+ daemon=True,
1121
+ )
1122
+ thread.start()
1123
+
1124
+ return {
1125
+ "job_id": job_id,
1126
+ "status": "initiated",
1127
+ "poll_url": f"/status/{job_id}",
1128
+ "started_at": _jobs[job_id]["started_at"],
1129
+ }
1130
+
1131
+
1132
+ # ============================================================================
1133
+ # RUN
1134
+ # ============================================================================
1135
+
1136
+ if __name__ == "__main__":
1137
+ import uvicorn
1138
+ uvicorn.run("fastapi_app:app", host="0.0.0.0", port=8000, reload=True)
gemini_transcript.py ADDED
@@ -0,0 +1,445 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+
3
+ from __future__ import annotations
4
+
5
+ import re
6
+ import sys
7
+ import json
8
+ import logging
9
+ import time
10
+
11
+ from pathlib import Path
12
+ from typing import Optional, List
13
+ from urllib.parse import urlparse, parse_qs
14
+ # from google import genai
15
+
16
+ from youtube_transcript_api import (
17
+ YouTubeTranscriptApi,
18
+ TranscriptsDisabled,
19
+ NoTranscriptFound,
20
+ VideoUnavailable,
21
+ )
22
+
23
+
24
+ # ============================================================================
25
+ # CONFIG
26
+ # ============================================================================
27
+
28
+ BASE_DIR = Path(__file__).resolve().parent
29
+ OUTPUT_DIR = BASE_DIR / "output"
30
+ OUTPUT_DIR.mkdir(exist_ok=True)
31
+
32
+ TRANSCRIPT_FILE = OUTPUT_DIR / "transcript.txt"
33
+ SUMMARY_FILE = OUTPUT_DIR / "summary.txt"
34
+ QA_FILE = OUTPUT_DIR / "qa.txt"
35
+
36
+ GEMINI_API_KEY = "AIzaSyCNz5wQAyJ65kNRkwr0-1A-_Z6-lQzdcyc"
37
+
38
+ GEMINI_MODELS = [
39
+ "gemini-2.5-flash",
40
+ "gemini-2.5-flash-lite",
41
+ "gemini-2.5-pro",
42
+ ]
43
+
44
+ POLLING_CONFIG = {
45
+ "attempt_1": {"wait_before": 0, "description": "Immediate attempt on trigger"},
46
+ "attempt_2": {"wait_before": 300, "description": "Retry after 5 minutes"},
47
+ "attempt_3": {"wait_before": 900, "description": "Retry after 15 minutes"},
48
+ "attempt_4": {"wait_before": 1800, "description": "Retry after 30 minutes"},
49
+ "attempt_5": {"wait_before": 3600, "description": "Retry after 1 hour"},
50
+ "attempt_6": {"wait_before": 3600, "description": "Retry after 2 hours total"},
51
+ "attempt_7": {"wait_before": 3600, "description": "Retry after 3 hours total"},
52
+ "attempt_8": {"wait_before": 3600, "description": "Retry after 4 hours total"},
53
+ "attempt_9": {"wait_before": 3600, "description": "Final attempt at 5 hours total"},
54
+ }
55
+
56
+ SYSTEM_PROMPT = """
57
+ You are an expert content summarizer and educator.
58
+
59
+ Produce the full output containing exactly two parts separated by a line with only 5 exclamation marks:
60
+
61
+ !!!!!
62
+
63
+ --- PART 1: SUMMARY ---
64
+
65
+ Write a detailed, well-structured summary of the entire content.
66
+ Use the following structure:
67
+
68
+ ## Overview
69
+ A 3-5 sentence high-level overview of the entire content.
70
+
71
+ ## Key Topics Covered
72
+ List the main topics discussed, each with a brief explanation.
73
+
74
+ ## Detailed Summary
75
+ A thorough section-by-section breakdown of the content in the order it was presented.
76
+ Use subheadings for each major section or topic shift.
77
+
78
+ ## Key Takeaways
79
+ A bullet list of the most important insights, facts, or conclusions from the content.
80
+
81
+ ---
82
+
83
+ !!!!!
84
+
85
+ --- PART 2: Q&A ---
86
+
87
+ Generate a comprehensive Q&A section based on the content.
88
+ Format each entry exactly like this:
89
+
90
+ Q1: [First question]
91
+ Answer: [Detailed answer]
92
+
93
+ Q2: [Second question]
94
+ Answer: [Detailed answer]
95
+
96
+ Q3: [Third question]
97
+ Answer: [Detailed answer]
98
+
99
+ ... and so on until all important questions are covered.
100
+
101
+ Rules:
102
+ - Number every question and answer with matching numbers (Q1/A1, Q2/A2, etc.)
103
+ - Each answer must be detailed and self-contained
104
+ - Cover all major topics, concepts, facts, and insights from the content
105
+ - Minimum 10 Q&A pairs, more if the content is rich
106
+ - Do NOT use bullet points inside answers β€” write in full sentences
107
+
108
+ ---
109
+ """
110
+
111
+
112
+ # ============================================================================
113
+ # LOGGING
114
+ # ============================================================================
115
+
116
+ logging.basicConfig(
117
+ level=logging.INFO,
118
+ format="%(asctime)s | %(levelname)s | %(message)s",
119
+ )
120
+ logger = logging.getLogger("gemini_pipeline")
121
+
122
+
123
+ # ============================================================================
124
+ # HELPERS
125
+ # ============================================================================
126
+
127
+ def _format_duration(seconds: int) -> str:
128
+ if seconds < 60:
129
+ return f"{seconds}s"
130
+ if seconds < 3600:
131
+ return f"{seconds // 60}m"
132
+ h = seconds // 3600
133
+ m = (seconds % 3600) // 60
134
+ return f"{h}h {m}m" if m else f"{h}h"
135
+
136
+
137
+ def fetch_video_title(video_id: str) -> str:
138
+ """Fetch YouTube video title via oembed β€” no API key needed."""
139
+ try:
140
+ import urllib.request
141
+ url = (
142
+ f"https://www.youtube.com/oembed"
143
+ f"?url=https://www.youtube.com/watch?v={video_id}&format=json"
144
+ )
145
+ with urllib.request.urlopen(url, timeout=10) as resp:
146
+ data = json.loads(resp.read().decode())
147
+ title = data.get("title", "")
148
+ safe = re.sub(r'[\\/*?:"<>|]', "", title)
149
+ safe = re.sub(r"\s+", "_", safe.strip())
150
+ return safe[:80] or video_id
151
+ except Exception:
152
+ return video_id
153
+
154
+
155
+ # ============================================================================
156
+ # YOUTUBE TRANSCRIPT FETCHER
157
+ # ============================================================================
158
+
159
+ class YouTubeTranscriptFetcher:
160
+ """Fetches YouTube transcript with polling retry for new uploads."""
161
+
162
+ def __init__(
163
+ self,
164
+ youtube_url: str,
165
+ output_file: Path = TRANSCRIPT_FILE,
166
+ languages: Optional[List[str]] = None,
167
+ polling_config: dict = None,
168
+ ):
169
+ self.youtube_url = youtube_url
170
+ self.output_file = Path(output_file)
171
+ self.languages = languages or ["en", "en-US", "en-GB"]
172
+ self.polling_config = polling_config or POLLING_CONFIG
173
+ self.video_id = self._extract_video_id(youtube_url)
174
+ self.api = YouTubeTranscriptApi()
175
+
176
+ @staticmethod
177
+ def _extract_video_id(url: str) -> str:
178
+ parsed = urlparse(url)
179
+ if parsed.hostname == "youtu.be":
180
+ return parsed.path.lstrip("/").split("?")[0]
181
+ if parsed.hostname in ("youtube.com", "www.youtube.com", "m.youtube.com"):
182
+ path_parts = parsed.path.strip("/").split("/")
183
+ if path_parts[0] in ("live", "shorts", "embed") and len(path_parts) >= 2:
184
+ return path_parts[1].split("?")[0]
185
+ params = parse_qs(parsed.query)
186
+ if "v" in params:
187
+ return params["v"][0]
188
+ raise ValueError(f"Could not extract video ID from URL: {url}")
189
+ raise ValueError(f"Unsupported YouTube URL: {url}")
190
+
191
+ def _fetch_once(self) -> str:
192
+ transcript = self.api.fetch(self.video_id, languages=self.languages)
193
+ return " ".join(item.text for item in transcript)
194
+
195
+ def _save(self, text: str) -> None:
196
+ self.output_file.parent.mkdir(parents=True, exist_ok=True)
197
+ self.output_file.write_text(text, encoding="utf-8")
198
+
199
+ def run(self) -> str:
200
+ logger.info("Video ID : %s", self.video_id)
201
+ logger.info("Output file : %s", self.output_file)
202
+ logger.info("Total polling attempts: %d", len(self.polling_config))
203
+
204
+ attempts = list(self.polling_config.items())
205
+
206
+ for idx, (attempt_key, config) in enumerate(attempts, start=1):
207
+ wait_before = config["wait_before"]
208
+ description = config["description"]
209
+
210
+ if wait_before > 0:
211
+ logger.info(
212
+ "[%d/%d] %s β€” waiting %s before retry...",
213
+ idx, len(attempts), description,
214
+ _format_duration(wait_before),
215
+ )
216
+ time.sleep(wait_before)
217
+
218
+ logger.info(
219
+ "[%d/%d] %s β€” fetching transcript now...",
220
+ idx, len(attempts), description,
221
+ )
222
+
223
+ try:
224
+ text = self._fetch_once()
225
+ self._save(text)
226
+ logger.info(
227
+ "[%d/%d] βœ… Transcript fetched β€” %d characters",
228
+ idx, len(attempts), len(text),
229
+ )
230
+ return text
231
+
232
+ except TranscriptsDisabled as e:
233
+ logger.warning("[%d/%d] Transcripts disabled: %s", idx, len(attempts), e)
234
+ raise # no point retrying
235
+
236
+ except VideoUnavailable as e:
237
+ logger.warning("[%d/%d] Video unavailable: %s", idx, len(attempts), e)
238
+
239
+ except NoTranscriptFound as e:
240
+ logger.warning("[%d/%d] No transcript yet: %s", idx, len(attempts), e)
241
+
242
+ except KeyboardInterrupt:
243
+ logger.warning("Interrupted by user.")
244
+ raise
245
+
246
+ except Exception as e:
247
+ logger.exception("[%d/%d] Unexpected error: %s", idx, len(attempts), e)
248
+
249
+ if idx < len(attempts):
250
+ next_cfg = attempts[idx][1]
251
+ logger.info(
252
+ "[%d/%d] Will retry in %s (%s)",
253
+ idx, len(attempts),
254
+ _format_duration(next_cfg["wait_before"]),
255
+ next_cfg["description"],
256
+ )
257
+ else:
258
+ logger.error("All %d polling attempts exhausted.", len(attempts))
259
+
260
+ raise RuntimeError(
261
+ f"Transcript not available after {len(attempts)} attempts (~5 hours). "
262
+ f"Video ID: {self.video_id}"
263
+ )
264
+
265
+
266
+ # ============================================================================
267
+ # GEMINI SUMMARIZER
268
+ # ============================================================================
269
+
270
+ class GeminiSummarizer:
271
+ """Sends transcript to Gemini with model fallback + per-model retry."""
272
+
273
+ # Retry config
274
+ MAX_RETRIES = 5
275
+ BASE_WAIT = 10 # seconds
276
+ MAX_WAIT = 120 # seconds cap
277
+
278
+ # Errors β†’ retry same model with backoff
279
+ RETRYABLE = ["503", "502", "500", "UNAVAILABLE", "SERVICE_UNAVAILABLE"]
280
+ # Errors β†’ skip to next model immediately
281
+ SKIP_TO_NEXT = ["429", "RESOURCE_EXHAUSTED", "quota", "404", "NOT_FOUND"]
282
+
283
+ def __init__(
284
+ self,
285
+ api_key: str = GEMINI_API_KEY,
286
+ models: list = None,
287
+ summary_file: Path = SUMMARY_FILE,
288
+ qa_file: Path = QA_FILE,
289
+ ):
290
+ self.client = genai.Client(api_key=api_key)
291
+ self.models = models or GEMINI_MODELS
292
+ self.summary_file = Path(summary_file)
293
+ self.qa_file = Path(qa_file)
294
+
295
+ def _call_api(self, transcript: str) -> tuple[str, str]:
296
+ """
297
+ Try each model in order.
298
+ Per model: retry up to MAX_RETRIES on transient errors with backoff.
299
+ Returns (response_text, model_used).
300
+ """
301
+ overall_last_error = None
302
+
303
+ for model in self.models:
304
+ logger.info("── Trying model: %s", model)
305
+ wait = self.BASE_WAIT
306
+ last_err = None
307
+
308
+ for attempt in range(1, self.MAX_RETRIES + 1):
309
+ try:
310
+ logger.info(" [%d/%d] Sending request...", attempt, self.MAX_RETRIES)
311
+ response = self.client.models.generate_content(
312
+ model=model,
313
+ contents=transcript,
314
+ config={"system_instruction": SYSTEM_PROMPT},
315
+ )
316
+ logger.info(
317
+ "βœ… Response received from: %s (attempt %d)",
318
+ model, attempt,
319
+ )
320
+ return response.text, model
321
+
322
+ except Exception as e:
323
+ err = str(e)
324
+ last_err = e
325
+
326
+ if any(k in err for k in self.SKIP_TO_NEXT):
327
+ logger.warning(
328
+ " [%d/%d] %s β€” quota/not-found, skipping to next model.",
329
+ attempt, self.MAX_RETRIES, model,
330
+ )
331
+ break # skip to next model
332
+
333
+ elif any(k in err for k in self.RETRYABLE):
334
+ if attempt < self.MAX_RETRIES:
335
+ logger.warning(
336
+ " [%d/%d] %s β€” transient error. "
337
+ "Retrying in %ds...",
338
+ attempt, self.MAX_RETRIES, model, wait,
339
+ )
340
+ time.sleep(wait)
341
+ wait = min(wait * 2, self.MAX_WAIT)
342
+ else:
343
+ logger.warning(
344
+ " [%d/%d] %s β€” max retries reached, "
345
+ "trying next model.",
346
+ attempt, self.MAX_RETRIES, model,
347
+ )
348
+
349
+ else:
350
+ logger.error(
351
+ " [%d/%d] %s β€” unhandled error: %s",
352
+ attempt, self.MAX_RETRIES, model, err,
353
+ )
354
+ raise
355
+
356
+ overall_last_error = last_err
357
+
358
+ raise RuntimeError(
359
+ f"All models and retries exhausted. Last error: {overall_last_error}"
360
+ )
361
+
362
+ @staticmethod
363
+ def _split(full_text: str) -> tuple[str, str]:
364
+ for pattern in (r"^\s*!{5}\s*$", r"^\s*!{3}\s*$"):
365
+ parts = re.split(pattern, full_text, flags=re.MULTILINE)
366
+ if len(parts) >= 2:
367
+ return parts[0].strip(), "".join(parts[1:]).strip()
368
+ return full_text.strip(), ""
369
+
370
+ def run(self, transcript: str) -> tuple[str, str, str]:
371
+ full, model_used = self._call_api(transcript)
372
+ summary, qa = self._split(full)
373
+
374
+ self.summary_file.write_text(summary, encoding="utf-8")
375
+ self.qa_file.write_text(qa, encoding="utf-8")
376
+
377
+ logger.info("Summary saved β†’ %s", self.summary_file)
378
+ logger.info("Q&A saved β†’ %s", self.qa_file)
379
+
380
+ return summary, qa, model_used
381
+
382
+
383
+ # ============================================================================
384
+ # PIPELINE
385
+ # ============================================================================
386
+
387
+ class TranscriptSummaryPipeline:
388
+
389
+ def __init__(
390
+ self,
391
+ youtube_url: str,
392
+ languages: Optional[List[str]] = None,
393
+ polling_config: dict = None,
394
+ ):
395
+ self.youtube_url = youtube_url
396
+ self.fetcher = YouTubeTranscriptFetcher(
397
+ youtube_url=youtube_url,
398
+ output_file=TRANSCRIPT_FILE,
399
+ languages=languages,
400
+ polling_config=polling_config,
401
+ )
402
+ self.summarizer = GeminiSummarizer()
403
+ self.video_id = self.fetcher.video_id
404
+ self.video_title = fetch_video_title(self.video_id)
405
+
406
+ def run(self) -> dict:
407
+ logger.info("=== Pipeline started ===")
408
+ logger.info("Video title : %s", self.video_title)
409
+
410
+ transcript = self.fetcher.run()
411
+ summary, qa, model = self.summarizer.run(transcript)
412
+
413
+ logger.info("=== Pipeline complete | model: %s ===", model)
414
+
415
+ return {
416
+ "video_id": self.video_id,
417
+ "video_title": self.video_title,
418
+ "model_used": model,
419
+ "summary": summary,
420
+ "qa": qa,
421
+ "transcript": transcript,
422
+ }
423
+
424
+
425
+ # ============================================================================
426
+ # CLI
427
+ # ============================================================================
428
+
429
+ def main():
430
+ if len(sys.argv) < 2:
431
+ print("Usage: python gemini.py <youtube_url>", file=sys.stderr)
432
+ sys.exit(1)
433
+
434
+ pipeline = TranscriptSummaryPipeline(
435
+ youtube_url=sys.argv[1],
436
+ languages=["en", "en-US", "en-GB"],
437
+ )
438
+ result = pipeline.run()
439
+ for key, value in result.items():
440
+ if key not in ("summary", "qa", "transcript"):
441
+ print(f"{key}: {value}")
442
+
443
+
444
+ if __name__ == "__main__":
445
+ main()
index.html ADDED
@@ -0,0 +1,378 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8" />
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0" />
6
+ <title>YT Summariser</title>
7
+ <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@tabler/icons-webfont@latest/tabler-icons.min.css" />
8
+ <style>
9
+ *, *::before, *::after { box-sizing: border-box; margin: 0; padding: 0; }
10
+ body { font-family: system-ui, sans-serif; background: #f9f9f8; color: #1a1a18; min-height: 100vh; display: flex; align-items: center; justify-content: center; }
11
+ .wrap { width: 100%; max-width: 460px; padding: 2.5rem 1.5rem; }
12
+
13
+ /* ── Logo ── */
14
+ .logo { display: flex; align-items: center; gap: 10px; margin-bottom: 2.5rem; }
15
+ .logo-icon { width: 36px; height: 36px; background: #E24B4A; border-radius: 8px; display: flex; align-items: center; justify-content: center; }
16
+ .logo-icon i { color: #fff; font-size: 18px; }
17
+ .logo-text { font-size: 15px; font-weight: 600; }
18
+ .logo-sub { font-size: 12px; color: #888; }
19
+
20
+ /* ── Auth banner ── */
21
+ .auth-banner {
22
+ display: flex; align-items: center; justify-content: space-between;
23
+ padding: 10px 14px; border-radius: 8px; margin-bottom: 1.5rem;
24
+ font-size: 13px; border: 1px solid #ddd; background: #fff;
25
+ }
26
+ .auth-banner.ok { border-color: #86efac; background: #f0fdf4; }
27
+ .auth-banner.bad { border-color: #fca5a5; background: #fff5f5; }
28
+ .auth-left { display: flex; align-items: center; gap: 8px; }
29
+ .auth-dot { width: 8px; height: 8px; border-radius: 50%; flex-shrink: 0; }
30
+ .auth-dot.ok { background: #22c55e; }
31
+ .auth-dot.bad { background: #ef4444; }
32
+ .auth-text { color: #444; }
33
+ .auth-btn {
34
+ font-size: 12px; font-weight: 600; padding: 5px 12px;
35
+ border-radius: 6px; border: none; cursor: pointer;
36
+ background: #1a1a18; color: #fff; text-decoration: none;
37
+ display: inline-flex; align-items: center; gap: 5px;
38
+ transition: opacity .15s;
39
+ }
40
+ .auth-btn:hover { opacity: .8; }
41
+ .auth-btn.connected { background: #dcfce7; color: #166534; cursor: default; }
42
+
43
+ /* ── Heading ── */
44
+ h1 { font-size: 22px; font-weight: 600; margin-bottom: 6px; }
45
+ .subtitle { font-size: 14px; color: #666; margin-bottom: 2rem; line-height: 1.6; }
46
+
47
+ /* ── Form ── */
48
+ .field { margin-bottom: 1rem; }
49
+ label { display: block; font-size: 13px; color: #555; margin-bottom: 5px; }
50
+ input[type=text], input[type=email] {
51
+ width: 100%; padding: 9px 12px; font-size: 14px;
52
+ border: 1px solid #ddd; border-radius: 8px; background: #fff;
53
+ outline: none; transition: border-color .15s;
54
+ }
55
+ input:focus { border-color: #E24B4A; }
56
+
57
+ /* ── Buttons ── */
58
+ .btn {
59
+ width: 100%; padding: 10px; font-size: 14px; font-weight: 600;
60
+ cursor: pointer; border-radius: 8px; border: none;
61
+ background: #E24B4A; color: #fff; margin-top: .5rem;
62
+ display: flex; align-items: center; justify-content: center; gap: 8px;
63
+ transition: opacity .15s;
64
+ }
65
+ .btn:hover { opacity: .88; }
66
+ .btn:disabled { opacity: .5; cursor: not-allowed; }
67
+
68
+ /* ── Error ── */
69
+ .err {
70
+ font-size: 13px; color: #a32d2d; margin-top: 1rem; display: none;
71
+ padding: 10px 12px; border: 1px solid #f09595; border-radius: 8px;
72
+ background: #fcebeb;
73
+ }
74
+
75
+ /* ── Status box ── */
76
+ .status-box {
77
+ margin-top: 1.5rem; border: 1px solid #e5e5e3;
78
+ border-radius: 12px; overflow: hidden; display: none; background: #fff;
79
+ }
80
+ .status-header {
81
+ padding: 12px 16px; display: flex; align-items: center;
82
+ justify-content: space-between; border-bottom: 1px solid #e5e5e3;
83
+ }
84
+ .status-label { font-size: 13px; font-weight: 600; }
85
+ .badge { font-size: 11px; padding: 3px 8px; border-radius: 20px; font-weight: 600; }
86
+ .badge-running { background: #dbeafe; color: #1e40af; }
87
+ .badge-done { background: #dcfce7; color: #166534; }
88
+ .badge-failed { background: #fee2e2; color: #991b1b; }
89
+
90
+ /* ── Steps ── */
91
+ .steps { padding: 12px 16px; display: flex; flex-direction: column; gap: 9px; }
92
+ .step { display: flex; align-items: center; gap: 10px; font-size: 13px; color: #999; }
93
+ .step.active { color: #1a1a18; }
94
+ .step-icon {
95
+ width: 20px; height: 20px; border-radius: 50%;
96
+ display: flex; align-items: center; justify-content: center;
97
+ flex-shrink: 0; font-size: 11px; border: 1px solid #ddd; background: #f5f5f4;
98
+ }
99
+ .step-icon.done { background: #dcfce7; border-color: #86efac; color: #166534; }
100
+ .step-icon.running { background: #dbeafe; border-color: #93c5fd; color: #1e40af; }
101
+ .step-icon.failed { background: #fee2e2; border-color: #fca5a5; color: #991b1b; }
102
+
103
+ /* ── Result links ── */
104
+ .result-box {
105
+ padding: 12px 16px; border-top: 1px solid #e5e5e3;
106
+ display: none; flex-direction: column; gap: 8px;
107
+ }
108
+ .result-link { display: flex; align-items: center; gap: 8px; font-size: 13px; }
109
+ .result-link a { color: #1e40af; text-decoration: none; }
110
+ .result-link a:hover { text-decoration: underline; }
111
+ .result-note { font-size: 12px; color: #888; margin-top: 2px; }
112
+
113
+ /* ── Spinner ── */
114
+ .spinner {
115
+ width: 11px; height: 11px; border: 1.5px solid currentColor;
116
+ border-top-color: transparent; border-radius: 50%;
117
+ animation: spin .7s linear infinite; display: inline-block;
118
+ }
119
+ @keyframes spin { to { transform: rotate(360deg); } }
120
+ </style>
121
+ </head>
122
+ <body>
123
+ <div class="wrap">
124
+
125
+ <!-- Logo -->
126
+ <div class="logo">
127
+ <div class="logo-icon"><i class="ti ti-brand-youtube"></i></div>
128
+ <div>
129
+ <div class="logo-text">YT Summariser</div>
130
+ <div class="logo-sub">Transcript β†’ Summary β†’ Q&amp;A</div>
131
+ </div>
132
+ </div>
133
+
134
+ <!-- Auth banner -->
135
+ <div class="auth-banner bad" id="auth-banner">
136
+ <div class="auth-left">
137
+ <div class="auth-dot bad" id="auth-dot"></div>
138
+ <span class="auth-text" id="auth-text">Google not connected</span>
139
+ </div>
140
+ <a href="/auth/start" class="auth-btn" id="auth-btn" target="_blank" onclick="onAuthClick()">
141
+ <i class="ti ti-brand-google"></i> Connect Google
142
+ </a>
143
+ </div>
144
+
145
+ <!-- Heading -->
146
+ <h1>Summarise a YouTube video</h1>
147
+ <p class="subtitle">Paste a YouTube link and your email β€” we'll process the transcript and send results to your inbox.</p>
148
+
149
+ <!-- Form -->
150
+ <div class="field">
151
+ <label for="yt-url">YouTube URL</label>
152
+ <input type="text" id="yt-url" placeholder="https://www.youtube.com/watch?v=..." />
153
+ </div>
154
+ <div class="field">
155
+ <label for="email">Email address</label>
156
+ <input type="email" id="email" placeholder="you@example.com" />
157
+ </div>
158
+
159
+ <button class="btn" id="submit-btn" onclick="submitJob()">
160
+ <i class="ti ti-player-play"></i> Start processing
161
+ </button>
162
+
163
+ <div class="err" id="err-box"></div>
164
+
165
+ <!-- Status box -->
166
+ <div class="status-box" id="status-box">
167
+ <div class="status-header">
168
+ <span class="status-label" id="status-label">Processing…</span>
169
+ <span class="badge badge-running" id="status-badge">Running</span>
170
+ </div>
171
+ <div class="steps" id="steps-list"></div>
172
+ <div class="result-box" id="result-box"></div>
173
+ </div>
174
+
175
+ </div>
176
+
177
+ <script>
178
+ /* ── Step config ── */
179
+ const STEP_LABELS = {
180
+ fetch_transcript: 'Fetching transcript',
181
+ summarize: 'Summarising with Gemini',
182
+ create_drive_folder: 'Creating Drive folder',
183
+ upload_summary: 'Uploading summary',
184
+ upload_qa: 'Uploading Q&A',
185
+ upload_transcript: 'Uploading transcript',
186
+ send_email: 'Sending email',
187
+ log_sheet: 'Logging to Sheets',
188
+ };
189
+ const STEP_ICONS = {
190
+ fetch_transcript: 'ti-file-text',
191
+ summarize: 'ti-brain',
192
+ create_drive_folder: 'ti-folder-plus',
193
+ upload_summary: 'ti-upload',
194
+ upload_qa: 'ti-upload',
195
+ upload_transcript: 'ti-upload',
196
+ send_email: 'ti-mail',
197
+ log_sheet: 'ti-table',
198
+ };
199
+
200
+ let pollTimer = null;
201
+
202
+ /* ── Auth status check ── */
203
+ async function checkAuth() {
204
+ try {
205
+ const res = await fetch('/auth/status');
206
+ const data = await res.json();
207
+ setAuthBanner(data.authenticated);
208
+ } catch (e) {
209
+ setAuthBanner(false);
210
+ }
211
+ }
212
+
213
+ function setAuthBanner(ok) {
214
+ const banner = document.getElementById('auth-banner');
215
+ const dot = document.getElementById('auth-dot');
216
+ const text = document.getElementById('auth-text');
217
+ const btn = document.getElementById('auth-btn');
218
+
219
+ if (ok) {
220
+ banner.className = 'auth-banner ok';
221
+ dot.className = 'auth-dot ok';
222
+ text.textContent = 'Google configured';
223
+ btn.className = 'auth-btn connected';
224
+ btn.innerHTML = '<i class="ti ti-circle-check"></i> Connected';
225
+ btn.removeAttribute('href');
226
+ btn.onclick = null;
227
+ } else {
228
+ banner.className = 'auth-banner bad';
229
+ dot.className = 'auth-dot bad';
230
+ text.textContent = 'Google not connected';
231
+ btn.className = 'auth-btn';
232
+ btn.innerHTML = '<i class="ti ti-brand-google"></i> Connect Google';
233
+ btn.href = '/auth/start';
234
+ btn.onclick = onAuthClick;
235
+ }
236
+ }
237
+
238
+ function onAuthClick() {
239
+ // After OAuth window closes, recheck
240
+ setTimeout(() => {
241
+ const check = setInterval(async () => {
242
+ const res = await fetch('/auth/status');
243
+ const data = await res.json();
244
+ if (data.authenticated) { clearInterval(check); setAuthBanner(true); }
245
+ }, 2000);
246
+ // Stop checking after 2 min
247
+ setTimeout(() => clearInterval(check), 120000);
248
+ }, 3000);
249
+ }
250
+
251
+ /* ── Helpers ── */
252
+ function showErr(msg) {
253
+ const b = document.getElementById('err-box');
254
+ b.textContent = msg; b.style.display = 'block';
255
+ }
256
+ function clearErr() { document.getElementById('err-box').style.display = 'none'; }
257
+
258
+ /* ── Submit ── */
259
+ async function submitJob() {
260
+ clearErr();
261
+ const url = document.getElementById('yt-url').value.trim();
262
+ const email = document.getElementById('email').value.trim();
263
+ if (!url) { showErr('Please enter a YouTube URL.'); return; }
264
+ if (!email) { showErr('Please enter an email address.'); return; }
265
+
266
+ const btn = document.getElementById('submit-btn');
267
+ btn.disabled = true;
268
+ btn.innerHTML = '<span class="spinner"></span> Starting…';
269
+
270
+ try {
271
+ const res = await fetch('/generate', {
272
+ method: 'POST',
273
+ headers: { 'Content-Type': 'application/json' },
274
+ body: JSON.stringify({ youtube_url: url, email_to: email }),
275
+ });
276
+ const data = await res.json();
277
+ if (!res.ok) {
278
+ showErr(data.detail || 'Request failed.');
279
+ btn.disabled = false;
280
+ btn.innerHTML = '<i class="ti ti-player-play"></i> Start processing';
281
+ return;
282
+ }
283
+ startPolling(data.job_id);
284
+ } catch (e) {
285
+ showErr('Could not reach the server. Is it running on localhost:8000?');
286
+ btn.disabled = false;
287
+ btn.innerHTML = '<i class="ti ti-player-play"></i> Start processing';
288
+ }
289
+ }
290
+
291
+ /* ── Polling ── */
292
+ function startPolling(jobId) {
293
+ document.getElementById('status-box').style.display = 'block';
294
+ renderSteps({});
295
+ pollTimer = setInterval(() => poll(jobId), 2500);
296
+ }
297
+
298
+ async function poll(jobId) {
299
+ try {
300
+ const res = await fetch('/status/' + jobId);
301
+ const data = await res.json();
302
+ renderSteps(data.steps || {});
303
+ updateBadge(data.status);
304
+
305
+ const running = Object.entries(data.steps || {}).find(([, v]) => v === 'running');
306
+ if (running) document.getElementById('status-label').textContent = STEP_LABELS[running[0]] + '…';
307
+
308
+ if (data.status === 'completed') {
309
+ clearInterval(pollTimer);
310
+ document.getElementById('status-label').textContent = 'Done!';
311
+ showResult(data.result);
312
+ const btn = document.getElementById('submit-btn');
313
+ btn.disabled = false;
314
+ btn.innerHTML = '<i class="ti ti-player-play"></i> Process another';
315
+ } else if (data.status === 'failed') {
316
+ clearInterval(pollTimer);
317
+ document.getElementById('status-label').textContent = 'Failed';
318
+ showErr('Pipeline failed: ' + (data.error || 'unknown error'));
319
+ const btn = document.getElementById('submit-btn');
320
+ btn.disabled = false;
321
+ btn.innerHTML = '<i class="ti ti-player-play"></i> Try again';
322
+ }
323
+ } catch (e) {}
324
+ }
325
+
326
+ /* ── Render steps ── */
327
+ function renderSteps(steps) {
328
+ const list = document.getElementById('steps-list');
329
+ list.innerHTML = Object.entries(STEP_LABELS).map(([key, label]) => {
330
+ const state = steps[key] || 'pending';
331
+ const iconCls = STEP_ICONS[key] || 'ti-circle';
332
+ let inner = '';
333
+ if (state === 'done') inner = '<i class="ti ti-check"></i>';
334
+ else if (state === 'running') inner = '<span class="spinner"></span>';
335
+ else if (state === 'failed') inner = '<i class="ti ti-x"></i>';
336
+ return `<div class="step ${state !== 'pending' ? 'active' : ''}">
337
+ <div class="step-icon ${state}">${inner}</div>
338
+ <i class="ti ${iconCls}" style="font-size:14px;color:#aaa"></i>
339
+ <span>${label}</span>
340
+ </div>`;
341
+ }).join('');
342
+ }
343
+
344
+ function updateBadge(status) {
345
+ const b = document.getElementById('status-badge');
346
+ b.className = 'badge';
347
+ if (status === 'completed') { b.classList.add('badge-done'); b.textContent = 'Completed'; }
348
+ else if (status === 'failed') { b.classList.add('badge-failed'); b.textContent = 'Failed'; }
349
+ else { b.classList.add('badge-running'); b.textContent = 'Running'; }
350
+ }
351
+
352
+ /* ── Show result links ── */
353
+ function showResult(result) {
354
+ if (!result) return;
355
+ const box = document.getElementById('result-box');
356
+ box.style.display = 'flex';
357
+ const drive = result.drive || {};
358
+ const links = [
359
+ ['ti-file-text', 'Summary', drive.summary?.web_view_link],
360
+ ['ti-help-circle', 'Q&A', drive.qa?.web_view_link],
361
+ ['ti-align-left', 'Transcript', drive.transcript?.web_view_link],
362
+ ];
363
+ box.innerHTML = links
364
+ .filter(([,, u]) => u)
365
+ .map(([icon, label, u]) => `
366
+ <div class="result-link">
367
+ <i class="ti ${icon}" style="font-size:15px;color:#888"></i>
368
+ <a href="${u}" target="_blank">${label} <i class="ti ti-external-link" style="font-size:11px"></i></a>
369
+ </div>`)
370
+ .join('') +
371
+ `<p class="result-note"><i class="ti ti-mail" style="font-size:13px;vertical-align:-2px"></i> Results also sent to your email</p>`;
372
+ }
373
+
374
+ /* ── Init ── */
375
+ checkAuth();
376
+ </script>
377
+ </body>
378
+ </html>
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ fastapi
2
+ uvicorn[standard]
3
+ google-api-python-client
4
+ google-auth-httplib2
5
+ google-auth-oauthlib
6
+ requests
7
+ youtube_transcript_api