atonyxu commited on
Commit
687a55b
·
verified ·
1 Parent(s): 4af7a9b

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +128 -0
app.py ADDED
@@ -0,0 +1,128 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import requests
3
+ import re
4
+ import html
5
+
6
+ def fetch_content(url: str):
7
+ """
8
+ 同步获取指定 URL 的内容。
9
+ 尝试解析文章页(图片)或列表页(标题和链接)。
10
+ """
11
+ try:
12
+ # Hugging Face Spaces 可能有网络限制,设置合理的超时
13
+ response = requests.get(url, timeout=30)
14
+ response.raise_for_status()
15
+ html_content = response.text
16
+
17
+ # 检查是否是文章页 (包含 .entry-content)
18
+ if '<div class="entry-content"' in html_content:
19
+ # 解析文章页
20
+ # 提取图片
21
+ img_regex = re.compile(r'<img[^>]+src=["\']([^"\']+)["\'][^>]*>', re.IGNORECASE)
22
+ img_urls = img_regex.findall(html_content)
23
+ img_urls = [url for url in img_urls if url] # 过滤空链接
24
+
25
+ # 提取标题 (第一个 <p> 标签内容)
26
+ p_regex = re.compile(r'<p[^>]*>([^<]*(?:<(?!\/p>)[^<]*)*?)<\/p>', re.IGNORECASE)
27
+ first_p_match = p_regex.search(html_content)
28
+ title = html.unescape(first_p_match.group(1)).strip() if first_p_match else "Untitled"
29
+
30
+ if img_urls:
31
+ # 返回图片画廊和标题
32
+ return gr.Gallery(visible=True, value=img_urls), gr.Textbox(visible=True, value=f"文章: {title}"), gr.HTML(visible=False, value="")
33
+ else:
34
+ return gr.Gallery(visible=True, value=[]), gr.Textbox(visible=True, value="文章: 找到文章但未提取到图片。"), gr.HTML(visible=False, value="")
35
+
36
+ else:
37
+ # 解析列表页 (包含 .articles-container)
38
+ # 简单地提取文章链接和标题,构建一个简单的 HTML 列表
39
+ # 查找文章卡片
40
+ article_card_regex = re.compile(
41
+ r'<div class="card">.*?<a href="([^"]+)"[^>]*>([^<]+)</a>.*?</div>',
42
+ re.DOTALL | re.IGNORECASE
43
+ )
44
+ matches = article_card_regex.findall(html_content)
45
+
46
+ if matches:
47
+ links_html_parts = ["<h3>页面文章链接:</h3><ul>"]
48
+ for href, title in matches:
49
+ full_url = href # 假设 href 已经是完整的代理 URL
50
+ unescaped_title = html.unescape(title).strip()
51
+ links_html_parts.append(f'<li><a href="{full_url}" target="_blank">{unescaped_title}</a></li>')
52
+ links_html_parts.append("</ul>")
53
+ links_html = "".join(links_html_parts)
54
+ else:
55
+ links_html = "<p>未在此页面找到文章链接。</p>"
56
+
57
+ # 提取分页链接
58
+ pagination_regex = re.compile(
59
+ r'<nav class="navigation pagination">.*?</nav>',
60
+ re.DOTALL | re.IGNORECASE
61
+ )
62
+ pagination_match = pagination_regex.search(html_content)
63
+ pagination_html = pagination_match.group(0) if pagination_match else ""
64
+
65
+ full_html_display = f"{links_html} {pagination_html}"
66
+
67
+ return gr.Gallery(visible=False, value=[]), gr.Textbox(visible=False, value=""), gr.HTML(visible=True, value=full_html_display)
68
+
69
+ except requests.exceptions.RequestException as e:
70
+ error_msg = f"请求错误: {str(e)}"
71
+ return gr.Gallery(visible=False, value=[]), gr.Textbox(visible=True, value=error_msg), gr.HTML(visible=False, value="")
72
+ except Exception as e:
73
+ error_msg = f"解析错误: {str(e)}"
74
+ return gr.Gallery(visible=False, value=[]), gr.Textbox(visible=True, value=error_msg), gr.HTML(visible=False, value="")
75
+
76
+
77
+ def load_url_content(url: str):
78
+ """
79
+ 加载 URL 内容的主函数,调用 fetch_content。
80
+ """
81
+ # 确保 URL 以 https:// 开头,避免意外请求
82
+ if not url.startswith(('http://', 'https://')):
83
+ url = 'https://' + url
84
+ # 确保域名是目标域名
85
+ if not '1069.atony.workers.dev' in url:
86
+ return gr.Gallery(visible=False, value=[]), gr.Textbox(visible=True, value="错误: URL 必须包含 '1069.atony.workers.dev'"), gr.HTML(visible=False, value="")
87
+
88
+ return fetch_content(url)
89
+
90
+ # Gradio 界面
91
+ with gr.Blocks(title="1069 Proxy Viewer (Hugging Face Spaces)") as demo:
92
+ gr.Markdown("## 1069 内容查看器 (代理模式 - Hugging Face Spaces)")
93
+ gr.Markdown("此工具用于获取和显示来自 `1069.atony.workers.dev` 的内容。请注意内容性质。")
94
+ gr.Markdown("**警告:请勿访问原始网站 `www.mens1069.com`,请仅使用代理地址。**")
95
+
96
+ # 顶部地址栏
97
+ url_input = gr.Textbox(
98
+ label="代理地址栏",
99
+ value="https://1069.atony.workers.dev/",
100
+ info="输入 1069.atony.workers.dev 的完整地址,例如主页或文章页。"
101
+ )
102
+ submit_btn = gr.Button("访问地址")
103
+
104
+ # 输出组件
105
+ error_output = gr.Textbox(label="状态/错误信息", interactive=False, visible=True)
106
+ image_gallery = gr.Gallery(label="文章图片", columns=3, object_fit="contain", height="auto", visible=False)
107
+ html_output = gr.HTML(label="页面链接 (列表页)", visible=False)
108
+
109
+ # 按钮点击事件
110
+ submit_btn.click(
111
+ fn=load_url_content,
112
+ inputs=url_input,
113
+ outputs=[image_gallery, error_output, html_output]
114
+ )
115
+
116
+ # 按 Enter 键也可以提交
117
+ url_input.submit(
118
+ fn=load_url_content,
119
+ inputs=url_input,
120
+ outputs=[image_gallery, error_output, html_output]
121
+ )
122
+
123
+ # 启用队列以处理请求
124
+ demo.queue()
125
+
126
+ # 启动应用
127
+ if __name__ == "__main__":
128
+ demo.launch(server_name="0.0.0.0", server_port=7860)