Spaces:

atonyxu
/

qwe

Sleeping

App Files Files Community

atonyxu commited on Oct 27, 2025

Commit

687a55b

verified ·

1 Parent(s): 4af7a9b

Upload app.py

Browse files

Files changed (1) hide show

app.py +128 -0

app.py ADDED Viewed

	@@ -0,0 +1,128 @@

+import gradio as gr
+import requests
+import re
+import html
+def fetch_content(url: str):
+    """
+    同步获取指定 URL 的内容。
+    尝试解析文章页（图片）或列表页（标题和链接）。
+    """
+    try:
+        # Hugging Face Spaces 可能有网络限制，设置合理的超时
+        response = requests.get(url, timeout=30)
+        response.raise_for_status()
+        html_content = response.text
+        # 检查是否是文章页 (包含 .entry-content)
+        if '<div class="entry-content"' in html_content:
+            # 解析文章页
+            # 提取图片
+            img_regex = re.compile(r'<img[^>]+src=["\']([^"\']+)["\'][^>]*>', re.IGNORECASE)
+            img_urls = img_regex.findall(html_content)
+            img_urls = [url for url in img_urls if url] # 过滤空链接
+            # 提取标题 (第一个 <p> 标签内容)
+            p_regex = re.compile(r'<p[^>]*>([^<]*(?:<(?!\/p>)[^<]*)*?)<\/p>', re.IGNORECASE)
+            first_p_match = p_regex.search(html_content)
+            title = html.unescape(first_p_match.group(1)).strip() if first_p_match else "Untitled"
+            if img_urls:
+                # 返回图片画廊和标题
+                return gr.Gallery(visible=True, value=img_urls), gr.Textbox(visible=True, value=f"文章: {title}"), gr.HTML(visible=False, value="")
+            else:
+                return gr.Gallery(visible=True, value=[]), gr.Textbox(visible=True, value="文章: 找到文章但未提取到图片。"), gr.HTML(visible=False, value="")
+        else:
+            # 解析列表页 (包含 .articles-container)
+            # 简单地提取文章链接和标题，构建一个简单的 HTML 列表
+            # 查找文章卡片
+            article_card_regex = re.compile(
+                r'<div class="card">.*?<a href="([^"]+)"[^>]*>([^<]+)</a>.*?</div>',
+                re.DOTALL | re.IGNORECASE
+            )
+            matches = article_card_regex.findall(html_content)
+            if matches:
+                links_html_parts = ["<h3>页面文章链接:</h3><ul>"]
+                for href, title in matches:
+                    full_url = href # 假设 href 已经是完整的代理 URL
+                    unescaped_title = html.unescape(title).strip()
+                    links_html_parts.append(f'<li><a href="{full_url}" target="_blank">{unescaped_title}</a></li>')
+                links_html_parts.append("</ul>")
+                links_html = "".join(links_html_parts)
+            else:
+                links_html = "<p>未在此页面找到文章链接。</p>"
+            # 提取分页链接
+            pagination_regex = re.compile(
+                r'<nav class="navigation pagination">.*?</nav>',
+                re.DOTALL | re.IGNORECASE
+            )
+            pagination_match = pagination_regex.search(html_content)
+            pagination_html = pagination_match.group(0) if pagination_match else ""
+            full_html_display = f"{links_html} {pagination_html}"
+            return gr.Gallery(visible=False, value=[]), gr.Textbox(visible=False, value=""), gr.HTML(visible=True, value=full_html_display)
+    except requests.exceptions.RequestException as e:
+        error_msg = f"请求错误: {str(e)}"
+        return gr.Gallery(visible=False, value=[]), gr.Textbox(visible=True, value=error_msg), gr.HTML(visible=False, value="")
+    except Exception as e:
+        error_msg = f"解析错误: {str(e)}"
+        return gr.Gallery(visible=False, value=[]), gr.Textbox(visible=True, value=error_msg), gr.HTML(visible=False, value="")
+def load_url_content(url: str):
+    """
+    加载 URL 内容的主函数，调用 fetch_content。
+    """
+    # 确保 URL 以 https:// 开头，避免意外请求
+    if not url.startswith(('http://', 'https://')):
+        url = 'https://' + url
+    # 确保域名是目标域名
+    if not '1069.atony.workers.dev' in url:
+         return gr.Gallery(visible=False, value=[]), gr.Textbox(visible=True, value="错误: URL 必须包含 '1069.atony.workers.dev'"), gr.HTML(visible=False, value="")
+    return fetch_content(url)
+# Gradio 界面
+with gr.Blocks(title="1069 Proxy Viewer (Hugging Face Spaces)") as demo:
+    gr.Markdown("## 1069 内容查看器 (代理模式 - Hugging Face Spaces)")
+    gr.Markdown("此工具用于获取和显示来自 `1069.atony.workers.dev` 的内容。请注意内容性质。")
+    gr.Markdown("**警告：请勿访问原始网站 `www.mens1069.com`，请仅使用代理地址。**")
+    # 顶部地址栏
+    url_input = gr.Textbox(
+        label="代理地址栏",
+        value="https://1069.atony.workers.dev/",
+        info="输入 1069.atony.workers.dev 的完整地址，例如主页或文章页。"
+    )
+    submit_btn = gr.Button("访问地址")
+    # 输出组件
+    error_output = gr.Textbox(label="状态/错误信息", interactive=False, visible=True)
+    image_gallery = gr.Gallery(label="文章图片", columns=3, object_fit="contain", height="auto", visible=False)
+    html_output = gr.HTML(label="页面链接 (列表页)", visible=False)
+    # 按钮点击事件
+    submit_btn.click(
+        fn=load_url_content,
+        inputs=url_input,
+        outputs=[image_gallery, error_output, html_output]
+    )
+    # 按 Enter 键也可以提交
+    url_input.submit(
+        fn=load_url_content,
+        inputs=url_input,
+        outputs=[image_gallery, error_output, html_output]
+    )
+# 启用队列以处理请求
+demo.queue()
+# 启动应用
+if __name__ == "__main__":
+    demo.launch(server_name="0.0.0.0", server_port=7860)