hamza2923 commited on
Commit
e236632
·
verified ·
1 Parent(s): 2d8eb22

Delete app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -187
app.py DELETED
@@ -1,187 +0,0 @@
1
- from fastapi import FastAPI, HTTPException
2
- from fastapi.middleware.cors import CORSMiddleware
3
- from pydantic import BaseModel
4
- from selenium import webdriver
5
- from selenium.webdriver.chrome.service import Service
6
- from selenium.webdriver.chrome.options import Options
7
- from selenium.webdriver.common.by import By
8
- from selenium.webdriver.support.ui import WebDriverWait
9
- from selenium.webdriver.support import expected_conditions as EC
10
- from selenium.common.exceptions import TimeoutException
11
- import time
12
- import logging
13
- import os
14
- import shutil
15
- from pathlib import Path
16
-
17
- app = FastAPI()
18
-
19
- # Configure CORS
20
- app.add_middleware(
21
- CORSMiddleware,
22
- allow_origins=["*"],
23
- allow_credentials=True,
24
- allow_methods=["*"],
25
- allow_headers=["*"],
26
- )
27
-
28
- # Configure logging
29
- logging.basicConfig(level=logging.INFO)
30
- logger = logging.getLogger(__name__)
31
-
32
- # Pydantic models
33
- class VideoRequest(BaseModel):
34
- url: str
35
-
36
- class TranscriptResponse(BaseModel):
37
- success: bool
38
- transcript: list[str] | None
39
- error: str | None
40
- processing_time: float
41
-
42
- # Driver init and route handlers here...
43
-
44
- def init_driver():
45
- options = Options()
46
- options.add_argument("--headless=new")
47
- options.add_argument("--no-sandbox")
48
- options.add_argument("--disable-dev-shm-usage")
49
- options.add_argument("--disable-gpu")
50
-
51
- # Try multiple possible Chrome binary locations
52
- possible_chrome_paths = [
53
- "/usr/bin/google-chrome",
54
- "/usr/bin/google-chrome-stable",
55
- "/usr/lib/chromium-browser/chrome",
56
- "/usr/bin/chromium"
57
- ]
58
- chrome_path = None
59
- for path in possible_chrome_paths:
60
- if os.path.exists(path):
61
- chrome_path = path
62
- break
63
-
64
- if not chrome_path:
65
- logger.error(f"No Chrome binary found in paths: {possible_chrome_paths}")
66
- raise Exception(f"No Chrome binary found in paths: {possible_chrome_paths}")
67
-
68
- options.binary_location = chrome_path
69
- logger.info(f"Using Chrome binary: {chrome_path}")
70
-
71
- try:
72
- chromedriver_path = "/usr/bin/chromedriver"
73
- if not os.path.exists(chromedriver_path):
74
- logger.error(f"ChromeDriver not found at {chromedriver_path}")
75
- raise Exception(f"ChromeDriver not found at {chromedriver_path}")
76
-
77
- service = Service(executable_path=chromedriver_path)
78
- driver = webdriver.Chrome(service=service, options=options)
79
- logger.info("ChromeDriver initialized successfully")
80
- return driver
81
- except Exception as e:
82
- logger.error(f"Driver initialization failed: {str(e)}")
83
- raise Exception(f"Driver initialization failed: {str(e)}")
84
-
85
- @app.post("/transcript", response_model=TranscriptResponse)
86
- async def get_transcript(request: VideoRequest):
87
- start_time = time.time()
88
- driver = None
89
-
90
- try:
91
- video_url = request.url
92
- if not ("youtube.com" in video_url or "youtu.be" in video_url):
93
- raise HTTPException(status_code=400, detail="Invalid YouTube URL")
94
-
95
- driver = init_driver()
96
- logger.info(f"Processing URL: {video_url}")
97
- driver.get(video_url)
98
-
99
- # Handle cookie consent if it appears
100
- try:
101
- cookie_button = WebDriverWait(driver, 5).until(
102
- EC.element_to_be_clickable((By.XPATH, "//*[contains(text(), 'Accept all')]"))
103
- )
104
- cookie_button.click()
105
- logger.info("Accepted cookies")
106
- except TimeoutException:
107
- logger.info("No cookie consent found")
108
- pass
109
-
110
- # Click more button
111
- more_button = WebDriverWait(driver, 10).until(
112
- EC.element_to_be_clickable((By.ID, "expand"))
113
- )
114
- driver.execute_script("arguments[0].click();", more_button)
115
-
116
- # Click transcript button
117
- transcript_button = WebDriverWait(driver, 10).until(
118
- EC.element_to_be_clickable((By.CSS_SELECTOR, "button[aria-label='Show transcript']"))
119
- )
120
- driver.execute_script("arguments[0].click();", transcript_button)
121
-
122
- # Wait for transcript
123
- WebDriverWait(driver, 15).until(
124
- EC.presence_of_element_located((By.ID, "segments-container"))
125
- )
126
-
127
- # Extract transcript
128
- segments = driver.find_elements(By.CSS_SELECTOR, "div.ytd-transcript-segment-renderer")
129
- transcript = []
130
- for segment in segments:
131
- try:
132
- text = segment.find_element(By.CLASS_NAME, "segment-text").text.strip()
133
- if text:
134
- transcript.append(text)
135
- except:
136
- continue
137
-
138
- if not transcript:
139
- raise HTTPException(status_code=404, detail="No transcript available")
140
-
141
- return TranscriptResponse(
142
- success=True,
143
- transcript=transcript,
144
- error=None,
145
- processing_time=time.time() - start_time
146
- )
147
-
148
- except TimeoutException as e:
149
- error_msg = "Timed out waiting for page elements - the video might not have transcripts"
150
- logger.error(error_msg)
151
- return TranscriptResponse(
152
- success=False,
153
- transcript=None,
154
- error=error_msg,
155
- processing_time=time.time() - start_time
156
- )
157
- except Exception as e:
158
- logger.error(f"Error: {str(e)}")
159
- return TranscriptResponse(
160
- success=False,
161
- transcript=None,
162
- error=str(e),
163
- processing_time=time.time() - start_time
164
- )
165
- finally:
166
- if driver:
167
- driver.quit()
168
-
169
- @app.get("/health")
170
- def health_check():
171
- chrome_path = shutil.which("google-chrome")
172
- chromedriver_path = shutil.which("chromedriver")
173
- return {
174
- "ChromePath": chrome_path,
175
- "ChromeDriverPath": chromedriver_path,
176
- "ChromeExists": Path(chrome_path or "").exists(),
177
- "ChromeDriverExists": Path(chromedriver_path or "").exists()
178
- }
179
-
180
- @app.get("/")
181
- async def root():
182
- return {"message": "Welcome to YouTube Transcript API"}
183
-
184
- if __name__ == "__main__":
185
- import uvicorn
186
- uvicorn.run(app, host="0.0.0.0", port=int(os.getenv("PORT", 7860)))
187
-