hamza2923 commited on
Commit
ef6bd0d
·
verified ·
1 Parent(s): d42a2ce

Delete main.py

Browse files
Files changed (1) hide show
  1. main.py +0 -189
main.py DELETED
@@ -1,189 +0,0 @@
1
- from fastapi import FastAPI, HTTPException
2
- from fastapi.middleware.cors import CORSMiddleware
3
- from pydantic import BaseModel
4
- from selenium import webdriver
5
- from selenium.webdriver.chrome.service import Service
6
- from selenium.webdriver.chrome.options import Options
7
- from selenium.webdriver.common.by import By
8
- from selenium.webdriver.support.ui import WebDriverWait
9
- from selenium.webdriver.support import expected_conditions as EC
10
- from selenium.common.exceptions import TimeoutException, WebDriverException
11
- import time
12
- import logging
13
- import os
14
- import shutil
15
- from pathlib import Path
16
-
17
-
18
- app = FastAPI()
19
-
20
- # Configure CORS
21
- app.add_middleware(
22
- CORSMiddleware,
23
- allow_origins=["*"],
24
- allow_credentials=True,
25
- allow_methods=["*"],
26
- allow_headers=["*"],
27
- )
28
-
29
- # Configure logging
30
- logging.basicConfig(level=logging.INFO)
31
- logger = logging.getLogger(__name__)
32
-
33
- class VideoRequest(BaseModel):
34
- url: str
35
-
36
- class TranscriptResponse(BaseModel):
37
- success: bool
38
- transcript: list[str] | None
39
- error: str | None
40
- processing_time: float
41
-
42
- def init_driver():
43
- options = Options()
44
- options.add_argument("--headless=new")
45
- options.add_argument("--no-sandbox")
46
- options.add_argument("--disable-dev-shm-usage")
47
- options.add_argument("--disable-gpu")
48
-
49
- # Try multiple possible Chrome binary locations
50
- possible_chrome_paths = [
51
- "/usr/bin/google-chrome",
52
- "/usr/bin/google-chrome-stable",
53
- "/usr/lib/chromium-browser/chrome",
54
- "/usr/bin/chromium"
55
- ]
56
- chrome_path = None
57
- for path in possible_chrome_paths:
58
- if os.path.exists(path):
59
- chrome_path = path
60
- break
61
-
62
- if not chrome_path:
63
- logger.error(f"No Chrome binary found in paths: {possible_chrome_paths}")
64
- raise Exception(f"No Chrome binary found in paths: {possible_chrome_paths}")
65
-
66
- options.binary_location = chrome_path
67
- logger.info(f"Using Chrome binary: {chrome_path}")
68
-
69
- try:
70
- chromedriver_path = "/usr/bin/chromedriver"
71
- if not os.path.exists(chromedriver_path):
72
- logger.error(f"ChromeDriver not found at {chromedriver_path}")
73
- raise Exception(f"ChromeDriver not found at {chromedriver_path}")
74
-
75
- service = Service(executable_path=chromedriver_path)
76
- driver = webdriver.Chrome(service=service, options=options)
77
- logger.info("ChromeDriver initialized successfully")
78
- return driver
79
- except Exception as e:
80
- logger.error(f"Driver initialization failed: {str(e)}")
81
- raise Exception(f"Driver initialization failed: {str(e)}")
82
-
83
- @app.post("/transcript", response_model=TranscriptResponse)
84
- async def get_transcript(request: VideoRequest):
85
- start_time = time.time()
86
- driver = None
87
-
88
- try:
89
- video_url = request.url
90
- if not ("youtube.com" in video_url or "youtu.be" in video_url):
91
- raise HTTPException(status_code=400, detail="Invalid YouTube URL")
92
-
93
- driver = init_driver()
94
- logger.info(f"Processing URL: {video_url}")
95
- driver.get(video_url)
96
-
97
- # Handle cookie consent if it appears
98
- try:
99
- cookie_button = WebDriverWait(driver, 5).until(
100
- EC.element_to_be_clickable((By.XPATH, "//*[contains(text(), 'Accept all')]"))
101
- )
102
- cookie_button.click()
103
- logger.info("Accepted cookies")
104
- except TimeoutException:
105
- logger.info("No cookie consent found")
106
- pass
107
-
108
- # Click more button
109
- more_button = WebDriverWait(driver, 10).until(
110
- EC.element_to_be_clickable((By.ID, "expand"))
111
- )
112
- driver.execute_script("arguments[0].click();", more_button)
113
-
114
- # Click transcript button
115
- transcript_button = WebDriverWait(driver, 10).until(
116
- EC.element_to_be_clickable((By.CSS_SELECTOR, "button[aria-label='Show transcript']"))
117
- )
118
- driver.execute_script("arguments[0].click();", transcript_button)
119
-
120
- # Wait for transcript
121
- WebDriverWait(driver, 15).until(
122
- EC.presence_of_element_located((By.ID, "segments-container"))
123
- )
124
-
125
- # Extract transcript
126
- segments = driver.find_elements(By.CSS_SELECTOR, "div.ytd-transcript-segment-renderer")
127
- transcript = []
128
- for segment in segments:
129
- try:
130
- text = segment.find_element(By.CLASS_NAME, "segment-text").text.strip()
131
- if text:
132
- transcript.append(text)
133
- except:
134
- continue
135
-
136
- if not transcript:
137
- raise HTTPException(status_code=404, detail="No transcript available")
138
-
139
- return TranscriptResponse(
140
- success=True,
141
- transcript=transcript,
142
- error=None,
143
- processing_time=time.time() - start_time
144
- )
145
-
146
- except TimeoutException as e:
147
- error_msg = "Timed out waiting for page elements - the video might not have transcripts"
148
- logger.error(error_msg)
149
- return TranscriptResponse(
150
- success=False,
151
- transcript=None,
152
- error=error_msg,
153
- processing_time=time.time() - start_time
154
- )
155
- except Exception as e:
156
- logger.error(f"Error: {str(e)}")
157
- return TranscriptResponse(
158
- success=False,
159
- transcript=None,
160
- error=str(e),
161
- processing_time=time.time() - start_time
162
- )
163
- finally:
164
- if driver:
165
- driver.quit()
166
-
167
-
168
-
169
- app = FastAPI()
170
-
171
- @app.get("/health")
172
- def health_check():
173
- chrome_path = shutil.which("google-chrome")
174
- chromedriver_path = shutil.which("chromedriver")
175
- return {
176
- "ChromePath": chrome_path,
177
- "ChromeDriverPath": chromedriver_path,
178
- "ChromeExists": Path(chrome_path or "").exists(),
179
- "ChromeDriverExists": Path(chromedriver_path or "").exists()
180
- }
181
-
182
-
183
- @app.get("/")
184
- async def root():
185
- return {"message": "Welcome to YouTube Transcript API"}
186
-
187
- if __name__ == "__main__":
188
- import uvicorn
189
- uvicorn.run(app, host="0.0.0.0", port=int(os.getenv("PORT", 7860)))