wkplhc commited on
Commit
7866d4f
·
verified ·
1 Parent(s): 110c06b

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +145 -0
app.py ADDED
@@ -0,0 +1,145 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import requests
3
+ from bs4 import BeautifulSoup
4
+ import re
5
+ from urllib.parse import urljoin, urlparse
6
+
7
+ def is_valid_url(url):
8
+ """检查URL是否有效"""
9
+ try:
10
+ result = urlparse(url)
11
+ return all([result.scheme, result.netloc])
12
+ except:
13
+ return False
14
+
15
+ def extract_video_urls(url):
16
+ """从给定URL提取视频地址"""
17
+ if not is_valid_url(url):
18
+ return "❌ 无效的URL,请检查格式是否正确(需包含http://或https://)"
19
+
20
+ try:
21
+ # 设置请求头,模拟浏览器访问
22
+ headers = {
23
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
24
+ }
25
+
26
+ # 发送请求获取网页内容
27
+ response = requests.get(url, headers=headers, timeout=10)
28
+ response.raise_for_status() # 抛出HTTP错误
29
+
30
+ # 解析HTML
31
+ soup = BeautifulSoup(response.text, 'html.parser')
32
+
33
+ # 存储提取到的视频URL
34
+ video_urls = []
35
+
36
+ # 1. 从video标签提取
37
+ video_tags = soup.find_all('video')
38
+ for tag in video_tags:
39
+ if 'src' in tag.attrs:
40
+ video_url = urljoin(url, tag['src'])
41
+ video_urls.append(f"🎬 视频标签: {video_url}")
42
+
43
+ # 2. 从source标签提取
44
+ source_tags = soup.find_all('source')
45
+ for tag in source_tags:
46
+ if 'src' in tag.attrs:
47
+ video_url = urljoin(url, tag['src'])
48
+ video_urls.append(f"📽️ 源标签: {video_url}")
49
+
50
+ # 3. 从iframe标签提取可能包含视频的链接
51
+ iframe_tags = soup.find_all('iframe')
52
+ for tag in iframe_tags:
53
+ if 'src' in tag.attrs:
54
+ iframe_url = urljoin(url, tag['src'])
55
+ video_urls.append(f"🔗 嵌入框架: {iframe_url}")
56
+
57
+ # 4. 搜索可能的视频URL模式
58
+ video_patterns = [
59
+ r'https?://[^"\']+\.(mp4|webm|mov|avi|flv|mkv)',
60
+ r'src=[\'"](https?://[^"\']+\.(mp4|webm|mov|avi|flv|mkv))[\'"]'
61
+ ]
62
+
63
+ for pattern in video_patterns:
64
+ matches = re.findall(pattern, response.text)
65
+ for match in matches:
66
+ video_url = match[0] if isinstance(match, tuple) else match
67
+ if video_url not in [u.split(": ", 1)[1] for u in video_urls]:
68
+ video_urls.append(f"🔍 检测到视频: {video_url}")
69
+
70
+ # 去重处理
71
+ unique_urls = []
72
+ seen = set()
73
+ for url_entry in video_urls:
74
+ url_part = url_entry.split(": ", 1)[1]
75
+ if url_part not in seen:
76
+ seen.add(url_part)
77
+ unique_urls.append(url_entry)
78
+
79
+ if not unique_urls:
80
+ return "ℹ️ 未在该网页中找到视频地址"
81
+ else:
82
+ return "\n\n".join(unique_urls)
83
+
84
+ except requests.exceptions.Timeout:
85
+ return "⏱️ 请求超时,请稍后再试"
86
+ except requests.exceptions.HTTPError as e:
87
+ return f"❌ HTTP错误: {str(e)}"
88
+ except requests.exceptions.RequestException as e:
89
+ return f"❌ 请求失败: {str(e)}"
90
+ except Exception as e:
91
+ return f"❌ 解析错误: {str(e)}"
92
+
93
+ # 创建Gradio界面
94
+ with gr.Blocks(title="视频地址提取工具", theme=gr.themes.Soft()) as demo:
95
+ gr.Markdown("""
96
+ # 🎥 视频地址提取工具
97
+
98
+ 输入包含视频的网页URL,提取该页面中所有视频的真实地址。
99
+
100
+ 使用说明:
101
+ 1. 输入完整的网页URL(需包含http://或https://)
102
+ 2. 点击"提取视频地址"按钮
103
+ 3. 等待解析完成后查看结果
104
+ """)
105
+
106
+ with gr.Row():
107
+ url_input = gr.Textbox(
108
+ label="网页URL",
109
+ placeholder="例如: https://example.com/video-page",
110
+ lines=1,
111
+ container=True
112
+ )
113
+
114
+ extract_btn = gr.Button("提取视频地址", variant="primary")
115
+
116
+ result_output = gr.Textbox(
117
+ label="提取结果",
118
+ lines=10,
119
+ container=True
120
+ )
121
+
122
+ # 设置按钮点击事件
123
+ extract_btn.click(
124
+ fn=extract_video_urls,
125
+ inputs=url_input,
126
+ outputs=result_output
127
+ )
128
+
129
+ # 设置回车键触发提取
130
+ url_input.submit(
131
+ fn=extract_video_urls,
132
+ inputs=url_input,
133
+ outputs=result_output
134
+ )
135
+
136
+ gr.Markdown("""
137
+ ⚠️ 注意:
138
+ - 部分网站可能因防盗链或权限限制无法提取视频
139
+ - 提取结果仅供学习研究使用
140
+ - 大型网页可能需要较长解析时间
141
+ """)
142
+
143
+ # 启动应用
144
+ if __name__ == "__main__":
145
+ demo.launch()