Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
|
@@ -8,6 +8,7 @@ from selenium.webdriver.common.by import By
|
|
| 8 |
from selenium.webdriver.support.ui import WebDriverWait
|
| 9 |
from selenium.webdriver.support import expected_conditions as EC
|
| 10 |
from selenium.common.exceptions import TimeoutException, WebDriverException
|
|
|
|
| 11 |
import time
|
| 12 |
import logging
|
| 13 |
import os
|
|
@@ -37,32 +38,19 @@ class TranscriptResponse(BaseModel):
|
|
| 37 |
processing_time: float
|
| 38 |
|
| 39 |
def init_driver():
|
| 40 |
-
from selenium import webdriver
|
| 41 |
-
from selenium.webdriver.chrome.service import Service
|
| 42 |
-
from selenium.webdriver.chrome.options import Options
|
| 43 |
-
|
| 44 |
options = Options()
|
| 45 |
options.add_argument("--headless=new")
|
| 46 |
options.add_argument("--no-sandbox")
|
| 47 |
options.add_argument("--disable-dev-shm-usage")
|
| 48 |
options.add_argument("--disable-gpu")
|
| 49 |
-
|
| 50 |
-
# For Hugging Face Spaces
|
| 51 |
options.binary_location = "/usr/bin/google-chrome"
|
| 52 |
|
| 53 |
try:
|
| 54 |
-
#
|
| 55 |
-
|
| 56 |
-
driver = webdriver.Chrome(service=service, options=options)
|
| 57 |
return driver
|
| 58 |
except Exception as e:
|
| 59 |
-
|
| 60 |
-
# Fallback without service
|
| 61 |
-
driver = webdriver.Chrome(options=options)
|
| 62 |
-
return driver
|
| 63 |
-
except Exception as e:
|
| 64 |
-
raise Exception(f"Both driver initialization methods failed: {str(e)}")
|
| 65 |
-
|
| 66 |
|
| 67 |
@app.post("/transcript", response_model=TranscriptResponse)
|
| 68 |
async def get_transcript(request: VideoRequest):
|
|
@@ -150,37 +138,42 @@ async def get_transcript(request: VideoRequest):
|
|
| 150 |
|
| 151 |
@app.get("/health")
|
| 152 |
async def health_check():
|
| 153 |
-
import os
|
| 154 |
paths = {
|
| 155 |
"chrome": "/usr/bin/google-chrome",
|
| 156 |
-
"chromedriver": "/usr/bin/chromedriver"
|
| 157 |
}
|
| 158 |
exists = {name: os.path.exists(path) for name, path in paths.items()}
|
| 159 |
|
| 160 |
-
# Try to get versions
|
| 161 |
chrome_version = "Not found"
|
| 162 |
chromedriver_version = "Not found"
|
| 163 |
|
| 164 |
if exists["chrome"]:
|
| 165 |
try:
|
| 166 |
chrome_version = os.popen("/usr/bin/google-chrome --version").read().strip()
|
| 167 |
-
except:
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
|
| 174 |
-
|
|
|
|
|
|
|
|
|
|
| 175 |
|
| 176 |
return {
|
| 177 |
-
"status":
|
| 178 |
"paths": exists,
|
| 179 |
"chrome_version": chrome_version,
|
| 180 |
"chromedriver_version": chromedriver_version,
|
| 181 |
-
"working":
|
| 182 |
}
|
| 183 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 184 |
if __name__ == "__main__":
|
| 185 |
import uvicorn
|
| 186 |
uvicorn.run(app, host="0.0.0.0", port=int(os.getenv("PORT", 7860)))
|
|
|
|
| 8 |
from selenium.webdriver.support.ui import WebDriverWait
|
| 9 |
from selenium.webdriver.support import expected_conditions as EC
|
| 10 |
from selenium.common.exceptions import TimeoutException, WebDriverException
|
| 11 |
+
from webdriver_manager.chrome import ChromeDriverManager
|
| 12 |
import time
|
| 13 |
import logging
|
| 14 |
import os
|
|
|
|
| 38 |
processing_time: float
|
| 39 |
|
| 40 |
def init_driver():
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
options = Options()
|
| 42 |
options.add_argument("--headless=new")
|
| 43 |
options.add_argument("--no-sandbox")
|
| 44 |
options.add_argument("--disable-dev-shm-usage")
|
| 45 |
options.add_argument("--disable-gpu")
|
|
|
|
|
|
|
| 46 |
options.binary_location = "/usr/bin/google-chrome"
|
| 47 |
|
| 48 |
try:
|
| 49 |
+
# Use WebDriver Manager to automatically download ChromeDriver
|
| 50 |
+
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)
|
|
|
|
| 51 |
return driver
|
| 52 |
except Exception as e:
|
| 53 |
+
raise Exception(f"Driver initialization failed: {str(e)}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 54 |
|
| 55 |
@app.post("/transcript", response_model=TranscriptResponse)
|
| 56 |
async def get_transcript(request: VideoRequest):
|
|
|
|
| 138 |
|
| 139 |
@app.get("/health")
|
| 140 |
async def health_check():
|
|
|
|
| 141 |
paths = {
|
| 142 |
"chrome": "/usr/bin/google-chrome",
|
| 143 |
+
"chromedriver": "/usr/bin/chromedriver" # May not exist with WebDriver Manager
|
| 144 |
}
|
| 145 |
exists = {name: os.path.exists(path) for name, path in paths.items()}
|
| 146 |
|
|
|
|
| 147 |
chrome_version = "Not found"
|
| 148 |
chromedriver_version = "Not found"
|
| 149 |
|
| 150 |
if exists["chrome"]:
|
| 151 |
try:
|
| 152 |
chrome_version = os.popen("/usr/bin/google-chrome --version").read().strip()
|
| 153 |
+
except Exception as e:
|
| 154 |
+
logger.error(f"Failed to get Chrome version: {str(e)}")
|
| 155 |
+
|
| 156 |
+
# Check ChromeDriver version (may be managed by WebDriver Manager)
|
| 157 |
+
try:
|
| 158 |
+
chromedriver_version = os.popen("/usr/bin/chromedriver --version").read().strip() or "Managed by WebDriver Manager"
|
| 159 |
+
except Exception as e:
|
| 160 |
+
logger.info("ChromeDriver version check skipped (likely managed by WebDriver Manager)")
|
| 161 |
+
|
| 162 |
+
status = "OK" if exists["chrome"] else "ERROR"
|
| 163 |
+
logger.info(f"Health check: Chrome={chrome_version}, ChromeDriver={chromedriver_version}, Paths={exists}")
|
| 164 |
|
| 165 |
return {
|
| 166 |
+
"status": status,
|
| 167 |
"paths": exists,
|
| 168 |
"chrome_version": chrome_version,
|
| 169 |
"chromedriver_version": chromedriver_version,
|
| 170 |
+
"working": exists["chrome"] # Chrome is critical, ChromeDriver may be managed
|
| 171 |
}
|
| 172 |
+
|
| 173 |
+
@app.get("/")
|
| 174 |
+
async def root():
|
| 175 |
+
return {"message": "Welcome to YouTube Transcript API"}
|
| 176 |
+
|
| 177 |
if __name__ == "__main__":
|
| 178 |
import uvicorn
|
| 179 |
uvicorn.run(app, host="0.0.0.0", port=int(os.getenv("PORT", 7860)))
|