Spaces:
Running
Running
Upload folder using huggingface_hub
Browse files- app.py +17 -7
- apps/pdf_cropper.py +26 -13
- apps/text_diff.py +52 -0
app.py
CHANGED
|
@@ -1,13 +1,9 @@
|
|
| 1 |
import gradio as gr
|
| 2 |
-
from apps import pdf_cropper, text_tools
|
| 3 |
-
|
| 4 |
-
# --- 主程序入口 ---
|
| 5 |
-
# 这里是“应用集市”的容器。
|
| 6 |
-
# 每次添加新工具,只需要 import 进来,并在 tab_list 中注册即可。
|
| 7 |
|
| 8 |
def create_main_interface():
|
| 9 |
with gr.Blocks(title="我的科研工具箱") as main_app:
|
| 10 |
-
gr.Markdown("# 🛠️
|
| 11 |
|
| 12 |
# 使用 Tab 布局来切换不同的工具
|
| 13 |
with gr.Tabs():
|
|
@@ -19,6 +15,10 @@ def create_main_interface():
|
|
| 19 |
# --- 工具 2: 文本分析 (示例) ---
|
| 20 |
with gr.TabItem("📝 文本统计"):
|
| 21 |
text_tools.create_ui()
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
|
| 23 |
# --- 可以在这里继续添加更多 Tab ---
|
| 24 |
|
|
@@ -26,4 +26,14 @@ def create_main_interface():
|
|
| 26 |
|
| 27 |
if __name__ == "__main__":
|
| 28 |
app = create_main_interface()
|
| 29 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
+
from apps import pdf_cropper, text_tools, text_diff
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
|
| 4 |
def create_main_interface():
|
| 5 |
with gr.Blocks(title="我的科研工具箱") as main_app:
|
| 6 |
+
gr.Markdown("# 🛠️ 写作妙妙小工具")
|
| 7 |
|
| 8 |
# 使用 Tab 布局来切换不同的工具
|
| 9 |
with gr.Tabs():
|
|
|
|
| 15 |
# --- 工具 2: 文本分析 (示例) ---
|
| 16 |
with gr.TabItem("📝 文本统计"):
|
| 17 |
text_tools.create_ui()
|
| 18 |
+
|
| 19 |
+
# --- 工具 3: 文本比对 ---
|
| 20 |
+
with gr.TabItem("🔍 文本比对"):
|
| 21 |
+
text_diff.create_ui()
|
| 22 |
|
| 23 |
# --- 可以在这里继续添加更多 Tab ---
|
| 24 |
|
|
|
|
| 26 |
|
| 27 |
if __name__ == "__main__":
|
| 28 |
app = create_main_interface()
|
| 29 |
+
|
| 30 |
+
# 创建一个微调过的 Soft 主题
|
| 31 |
+
# primary_hue="indigo" (靛青色,更有科技感)
|
| 32 |
+
# neutral_hue="slate" (岩灰色,更护眼)
|
| 33 |
+
custom_theme = gr.themes.Soft(
|
| 34 |
+
primary_hue="indigo",
|
| 35 |
+
neutral_hue="slate",
|
| 36 |
+
)
|
| 37 |
+
|
| 38 |
+
# 注意:在 Gradio 新版本中,theme 参数已移动到 launch() 方法中
|
| 39 |
+
app.launch(inbrowser=True, theme=custom_theme)
|
apps/pdf_cropper.py
CHANGED
|
@@ -3,6 +3,8 @@ from PIL import Image, ImageChops
|
|
| 3 |
import img2pdf
|
| 4 |
import io
|
| 5 |
|
|
|
|
|
|
|
| 6 |
# --- 核心逻辑 (复用之前的代码) ---
|
| 7 |
def trim_whitespace(im, fuzz_level=20):
|
| 8 |
bg = Image.new(im.mode, im.size, (255, 255, 255))
|
|
@@ -15,10 +17,12 @@ def trim_whitespace(im, fuzz_level=20):
|
|
| 15 |
|
| 16 |
def process_pipeline(files, quality, fuzz_level, progress=gr.Progress()):
|
| 17 |
if not files: return None
|
| 18 |
-
|
|
|
|
| 19 |
for filepath in progress.tqdm(files, desc="处理中"):
|
| 20 |
try:
|
| 21 |
with Image.open(filepath) as img:
|
|
|
|
| 22 |
if img.mode in ('RGBA', 'LA') or (img.mode == 'P' and 'transparency' in img.info):
|
| 23 |
bg = Image.new('RGB', img.size, (255, 255, 255))
|
| 24 |
if img.mode!= 'RGBA': img = img.convert('RGBA')
|
|
@@ -27,17 +31,26 @@ def process_pipeline(files, quality, fuzz_level, progress=gr.Progress()):
|
|
| 27 |
else:
|
| 28 |
img = img.convert('RGB')
|
| 29 |
|
|
|
|
| 30 |
trimmed = trim_whitespace(img, fuzz_level)
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
|
| 36 |
-
|
| 37 |
-
output_filename = "output_document.pdf"
|
| 38 |
-
with open(output_filename, "wb") as f:
|
| 39 |
-
f.write(img2pdf.convert(pdf_components))
|
| 40 |
-
return output_filename
|
| 41 |
|
| 42 |
# --- 模块化 UI 接口 ---
|
| 43 |
def create_ui():
|
|
@@ -48,11 +61,11 @@ def create_ui():
|
|
| 48 |
"""
|
| 49 |
with gr.Row():
|
| 50 |
with gr.Column():
|
| 51 |
-
file_input = gr.File(file_count="multiple", file_types=["image"], label="上传图片")
|
| 52 |
-
quality = gr.Slider(10, 100, 90, label="质量")
|
| 53 |
fuzz = gr.Slider(0, 100, 30, label="容差")
|
| 54 |
btn = gr.Button("开始处理", variant="primary")
|
| 55 |
with gr.Column():
|
| 56 |
-
output = gr.File(label="
|
| 57 |
|
| 58 |
btn.click(process_pipeline, [file_input, quality, fuzz], output)
|
|
|
|
| 3 |
import img2pdf
|
| 4 |
import io
|
| 5 |
|
| 6 |
+
import os
|
| 7 |
+
|
| 8 |
# --- 核心逻辑 (复用之前的代码) ---
|
| 9 |
def trim_whitespace(im, fuzz_level=20):
|
| 10 |
bg = Image.new(im.mode, im.size, (255, 255, 255))
|
|
|
|
| 17 |
|
| 18 |
def process_pipeline(files, quality, fuzz_level, progress=gr.Progress()):
|
| 19 |
if not files: return None
|
| 20 |
+
output_files = []
|
| 21 |
+
|
| 22 |
for filepath in progress.tqdm(files, desc="处理中"):
|
| 23 |
try:
|
| 24 |
with Image.open(filepath) as img:
|
| 25 |
+
# 处理透明背景
|
| 26 |
if img.mode in ('RGBA', 'LA') or (img.mode == 'P' and 'transparency' in img.info):
|
| 27 |
bg = Image.new('RGB', img.size, (255, 255, 255))
|
| 28 |
if img.mode!= 'RGBA': img = img.convert('RGBA')
|
|
|
|
| 31 |
else:
|
| 32 |
img = img.convert('RGB')
|
| 33 |
|
| 34 |
+
# 裁边
|
| 35 |
trimmed = trim_whitespace(img, fuzz_level)
|
| 36 |
+
|
| 37 |
+
# 获取原始文件名并构建输出路径
|
| 38 |
+
original_name = os.path.basename(filepath)
|
| 39 |
+
name, _ = os.path.splitext(original_name)
|
| 40 |
+
# 保持文件名,改为 .pdf 后缀
|
| 41 |
+
output_filename = f"{name}_cropped.pdf"
|
| 42 |
+
|
| 43 |
+
# 保存为 PDF
|
| 44 |
+
# img2pdf 需要 bytes 或文件路径,这里我们先存为临时图片再转,或者直接用 PIL save pdf
|
| 45 |
+
# PIL save PDF 支持单张
|
| 46 |
+
trimmed.save(output_filename, "PDF", resolution=100.0, save_all=True)
|
| 47 |
+
output_files.append(output_filename)
|
| 48 |
+
|
| 49 |
+
except Exception as e:
|
| 50 |
+
print(f"Error processing {filepath}: {e}")
|
| 51 |
+
continue
|
| 52 |
|
| 53 |
+
return output_files
|
|
|
|
|
|
|
|
|
|
|
|
|
| 54 |
|
| 55 |
# --- 模块化 UI 接口 ---
|
| 56 |
def create_ui():
|
|
|
|
| 61 |
"""
|
| 62 |
with gr.Row():
|
| 63 |
with gr.Column():
|
| 64 |
+
file_input = gr.File(file_count="multiple", file_types=["image"], label="上传图片 (支持多选)")
|
| 65 |
+
quality = gr.Slider(10, 100, 90, label="质量 (仅用于压缩,当前直接转PDF可忽略)")
|
| 66 |
fuzz = gr.Slider(0, 100, 30, label="容差")
|
| 67 |
btn = gr.Button("开始处理", variant="primary")
|
| 68 |
with gr.Column():
|
| 69 |
+
output = gr.File(label="下载结果", file_count="multiple")
|
| 70 |
|
| 71 |
btn.click(process_pipeline, [file_input, quality, fuzz], output)
|
apps/text_diff.py
ADDED
|
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
from difflib import Differ
|
| 3 |
+
|
| 4 |
+
def diff_texts(text1, text2):
|
| 5 |
+
d = Differ()
|
| 6 |
+
# 使用 splitlines(keepends=True) 可以保留换行符,使 diff 更自然
|
| 7 |
+
# 但 HighlightedText 通常基于 token/word,这里我们按单词分割来做细粒度对比
|
| 8 |
+
# 如果需要按行对比,可以调整逻辑
|
| 9 |
+
|
| 10 |
+
# 这里演示按单词/字符流的对比,效果类似 Code Diff
|
| 11 |
+
return [
|
| 12 |
+
(token[2:], token[0] if token[0] != " " else None)
|
| 13 |
+
for token in d.compare(text1.splitlines(keepends=True), text2.splitlines(keepends=True))
|
| 14 |
+
]
|
| 15 |
+
|
| 16 |
+
def diff_texts_word_level(text1, text2):
|
| 17 |
+
d = Differ()
|
| 18 |
+
# 按单词分割,更适合文章类文本
|
| 19 |
+
diff = d.compare(text1.split(), text2.split())
|
| 20 |
+
result = []
|
| 21 |
+
for token in diff:
|
| 22 |
+
code = token[0]
|
| 23 |
+
word = token[2:] + " " # 补回空格
|
| 24 |
+
if code == " ":
|
| 25 |
+
result.append((word, None))
|
| 26 |
+
elif code == "-":
|
| 27 |
+
result.append((word, "-")) # 删除
|
| 28 |
+
elif code == "+":
|
| 29 |
+
result.append((word, "+")) # 新增
|
| 30 |
+
return result
|
| 31 |
+
|
| 32 |
+
def create_ui():
|
| 33 |
+
with gr.Row():
|
| 34 |
+
with gr.Column():
|
| 35 |
+
t1 = gr.Textbox(label="原始文本 (Original)", lines=15, placeholder="输入旧版本文本...")
|
| 36 |
+
with gr.Column():
|
| 37 |
+
t2 = gr.Textbox(label="新文本 (New)", lines=15, placeholder="输入新版本文本...")
|
| 38 |
+
|
| 39 |
+
with gr.Row():
|
| 40 |
+
btn = gr.Button("🔍 对比差异", variant="primary")
|
| 41 |
+
|
| 42 |
+
# HighlightedText 组件非常适合展示 Diff
|
| 43 |
+
# color_map 定义了不同标记的颜色:+ 为绿色(新增),- 为红色(删除)
|
| 44 |
+
diff_output = gr.HighlightedText(
|
| 45 |
+
label="差异视图 (Diff View)",
|
| 46 |
+
combine_adjacent=True,
|
| 47 |
+
show_legend=True,
|
| 48 |
+
color_map={"+": "green", "-": "red"}
|
| 49 |
+
)
|
| 50 |
+
|
| 51 |
+
# 这里使用 word level diff,视觉效果更好
|
| 52 |
+
btn.click(diff_texts_word_level, inputs=[t1, t2], outputs=[diff_output])
|