Spaces:

wkplhc
/

url

Sleeping

App Files Files Community

wkplhc commited on Sep 8, 2025

Commit

7866d4f

verified ·

1 Parent(s): 110c06b

Create app.py

Browse files

Files changed (1) hide show

app.py +145 -0

app.py ADDED Viewed

	@@ -0,0 +1,145 @@

+import gradio as gr
+import requests
+from bs4 import BeautifulSoup
+import re
+from urllib.parse import urljoin, urlparse
+def is_valid_url(url):
+    """检查URL是否有效"""
+    try:
+        result = urlparse(url)
+        return all([result.scheme, result.netloc])
+    except:
+        return False
+def extract_video_urls(url):
+    """从给定URL提取视频地址"""
+    if not is_valid_url(url):
+        return "❌ 无效的URL，请检查格式是否正确（需包含http://或https://）"
+    try:
+        # 设置请求头，模拟浏览器访问
+        headers = {
+            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
+        }
+        # 发送请求获取网页内容
+        response = requests.get(url, headers=headers, timeout=10)
+        response.raise_for_status()  # 抛出HTTP错误
+        # 解析HTML
+        soup = BeautifulSoup(response.text, 'html.parser')
+        # 存储提取到的视频URL
+        video_urls = []
+        # 1. 从video标签提取
+        video_tags = soup.find_all('video')
+        for tag in video_tags:
+            if 'src' in tag.attrs:
+                video_url = urljoin(url, tag['src'])
+                video_urls.append(f"🎬 视频标签: {video_url}")
+        # 2. 从source标签提取
+        source_tags = soup.find_all('source')
+        for tag in source_tags:
+            if 'src' in tag.attrs:
+                video_url = urljoin(url, tag['src'])
+                video_urls.append(f"📽️ 源标签: {video_url}")
+        # 3. 从iframe标签提取可能包含视频的链接
+        iframe_tags = soup.find_all('iframe')
+        for tag in iframe_tags:
+            if 'src' in tag.attrs:
+                iframe_url = urljoin(url, tag['src'])
+                video_urls.append(f"🔗 嵌入框架: {iframe_url}")
+        # 4. 搜索可能的视频URL模式
+        video_patterns = [
+            r'https?://[^"\']+\.(mp4|webm|mov|avi|flv|mkv)',
+            r'src=[\'"](https?://[^"\']+\.(mp4|webm|mov|avi|flv|mkv))[\'"]'
+        ]
+        for pattern in video_patterns:
+            matches = re.findall(pattern, response.text)
+            for match in matches:
+                video_url = match[0] if isinstance(match, tuple) else match
+                if video_url not in [u.split(": ", 1)[1] for u in video_urls]:
+                    video_urls.append(f"🔍 检测到视频: {video_url}")
+        # 去重处理
+        unique_urls = []
+        seen = set()
+        for url_entry in video_urls:
+            url_part = url_entry.split(": ", 1)[1]
+            if url_part not in seen:
+                seen.add(url_part)
+                unique_urls.append(url_entry)
+        if not unique_urls:
+            return "ℹ️ 未在该网页中找到视频地址"
+        else:
+            return "\n\n".join(unique_urls)
+    except requests.exceptions.Timeout:
+        return "⏱️ 请求超时，请稍后再试"
+    except requests.exceptions.HTTPError as e:
+        return f"❌ HTTP错误: {str(e)}"
+    except requests.exceptions.RequestException as e:
+        return f"❌ 请求失败: {str(e)}"
+    except Exception as e:
+        return f"❌ 解析错误: {str(e)}"
+# 创建Gradio界面
+with gr.Blocks(title="视频地址提取工具", theme=gr.themes.Soft()) as demo:
+    gr.Markdown("""
+    # 🎥 视频地址提取工具
+    输入包含视频的网页URL，提取该页面中所有视频的真实地址。
+    使用说明:
+    1. 输入完整的网页URL（需包含http://或https://）
+    2. 点击"提取视频地址"按钮
+    3. 等待解析完成后查看结果
+    """)
+    with gr.Row():
+        url_input = gr.Textbox(
+            label="网页URL",
+            placeholder="例如: https://example.com/video-page",
+            lines=1,
+            container=True
+        )
+        extract_btn = gr.Button("提取视频地址", variant="primary")
+    result_output = gr.Textbox(
+        label="提取结果",
+        lines=10,
+        container=True
+    )
+    # 设置按钮点击事件
+    extract_btn.click(
+        fn=extract_video_urls,
+        inputs=url_input,
+        outputs=result_output
+    )
+    # 设置回车键触发提取
+    url_input.submit(
+        fn=extract_video_urls,
+        inputs=url_input,
+        outputs=result_output
+    )
+    gr.Markdown("""
+    ⚠️ 注意:
+    - 部分网站可能因防盗链或权限限制无法提取视频
+    - 提取结果仅供学习研究使用
+    - 大型网页可能需要较长解析时间
+    """)
+# 启动应用
+if __name__ == "__main__":
+    demo.launch()