Spaces:
Paused
Paused
Upload app.py
Browse files
app.py
CHANGED
|
@@ -47,6 +47,32 @@ class VideoResponse(BaseModel):
|
|
| 47 |
total_pages: Optional[int] = None
|
| 48 |
video_duration: Optional[float] = None # 秒
|
| 49 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 50 |
# ==============================
|
| 51 |
# コア機能実装
|
| 52 |
# ==============================
|
|
@@ -65,6 +91,9 @@ def download_pdf_from_url(pdf_url: str) -> str:
|
|
| 65 |
Exception: ダウンロード失敗時
|
| 66 |
"""
|
| 67 |
try:
|
|
|
|
|
|
|
|
|
|
| 68 |
logger.info(f"PDFダウンロード開始: {pdf_url}")
|
| 69 |
|
| 70 |
# HTTPリクエスト
|
|
@@ -363,6 +392,9 @@ def process_pdf_url(pdf_url, duration_per_page, dpi, progress=gr.Progress()):
|
|
| 363 |
if not pdf_url:
|
| 364 |
return None, "❌ PDF URLを入力してください", None
|
| 365 |
|
|
|
|
|
|
|
|
|
|
| 366 |
progress(0, desc="PDFダウンロード中...")
|
| 367 |
|
| 368 |
# PDFダウンロード
|
|
|
|
| 47 |
total_pages: Optional[int] = None
|
| 48 |
video_duration: Optional[float] = None # 秒
|
| 49 |
|
| 50 |
+
# ==============================
|
| 51 |
+
# URL前処理ユーティリティ
|
| 52 |
+
# ==============================
|
| 53 |
+
|
| 54 |
+
def sanitize_url(url: str) -> str:
|
| 55 |
+
"""
|
| 56 |
+
URLからバックスラッシュやエスケープシーケンスを除去
|
| 57 |
+
|
| 58 |
+
Args:
|
| 59 |
+
url: 元のURL文字列
|
| 60 |
+
|
| 61 |
+
Returns:
|
| 62 |
+
str: クリーニングされたURL
|
| 63 |
+
"""
|
| 64 |
+
# バックスラッシュを除去
|
| 65 |
+
cleaned_url = url.replace('\', '')
|
| 66 |
+
|
| 67 |
+
# 前後の空白を削除
|
| 68 |
+
cleaned_url = cleaned_url.strip()
|
| 69 |
+
|
| 70 |
+
# ダブルクォートを除去(JSON文字列から来た場合)
|
| 71 |
+
cleaned_url = cleaned_url.strip('"').strip("'")
|
| 72 |
+
|
| 73 |
+
logger.info(f"URL sanitized: {url} → {cleaned_url}")
|
| 74 |
+
return cleaned_url
|
| 75 |
+
|
| 76 |
# ==============================
|
| 77 |
# コア機能実装
|
| 78 |
# ==============================
|
|
|
|
| 91 |
Exception: ダウンロード失敗時
|
| 92 |
"""
|
| 93 |
try:
|
| 94 |
+
# URLをサニタイズ(バックスラッシュ等を除去)
|
| 95 |
+
pdf_url = sanitize_url(pdf_url)
|
| 96 |
+
|
| 97 |
logger.info(f"PDFダウンロード開始: {pdf_url}")
|
| 98 |
|
| 99 |
# HTTPリクエスト
|
|
|
|
| 392 |
if not pdf_url:
|
| 393 |
return None, "❌ PDF URLを入力してください", None
|
| 394 |
|
| 395 |
+
# URLをサニタイズ
|
| 396 |
+
pdf_url = sanitize_url(pdf_url)
|
| 397 |
+
|
| 398 |
progress(0, desc="PDFダウンロード中...")
|
| 399 |
|
| 400 |
# PDFダウンロード
|