MashiroLn commited on
Commit
bfedf71
·
verified ·
1 Parent(s): 0aee62c

Upload folder using huggingface_hub

Browse files
Files changed (3) hide show
  1. app.py +17 -7
  2. apps/pdf_cropper.py +26 -13
  3. apps/text_diff.py +52 -0
app.py CHANGED
@@ -1,13 +1,9 @@
1
  import gradio as gr
2
- from apps import pdf_cropper, text_tools
3
-
4
- # --- 主程序入口 ---
5
- # 这里是“应用集市”的容器。
6
- # 每次添加新工具,只需要 import 进来,并在 tab_list 中注册即可。
7
 
8
  def create_main_interface():
9
  with gr.Blocks(title="我的科研工具箱") as main_app:
10
- gr.Markdown("# 🛠️ 科研效率工具箱")
11
 
12
  # 使用 Tab 布局来切换不同的工具
13
  with gr.Tabs():
@@ -19,6 +15,10 @@ def create_main_interface():
19
  # --- 工具 2: 文本分析 (示例) ---
20
  with gr.TabItem("📝 文本统计"):
21
  text_tools.create_ui()
 
 
 
 
22
 
23
  # --- 可以在这里继续添加更多 Tab ---
24
 
@@ -26,4 +26,14 @@ def create_main_interface():
26
 
27
  if __name__ == "__main__":
28
  app = create_main_interface()
29
- app.launch(inbrowser=True)
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
+ from apps import pdf_cropper, text_tools, text_diff
 
 
 
 
3
 
4
  def create_main_interface():
5
  with gr.Blocks(title="我的科研工具箱") as main_app:
6
+ gr.Markdown("# 🛠️ 写作妙妙小工具")
7
 
8
  # 使用 Tab 布局来切换不同的工具
9
  with gr.Tabs():
 
15
  # --- 工具 2: 文本分析 (示例) ---
16
  with gr.TabItem("📝 文本统计"):
17
  text_tools.create_ui()
18
+
19
+ # --- 工具 3: 文本比对 ---
20
+ with gr.TabItem("🔍 文本比对"):
21
+ text_diff.create_ui()
22
 
23
  # --- 可以在这里继续添加更多 Tab ---
24
 
 
26
 
27
  if __name__ == "__main__":
28
  app = create_main_interface()
29
+
30
+ # 创建一个微调过的 Soft 主题
31
+ # primary_hue="indigo" (靛青色,更有科技感)
32
+ # neutral_hue="slate" (岩灰色,更护眼)
33
+ custom_theme = gr.themes.Soft(
34
+ primary_hue="indigo",
35
+ neutral_hue="slate",
36
+ )
37
+
38
+ # 注意:在 Gradio 新版本中,theme 参数已移动到 launch() 方法中
39
+ app.launch(inbrowser=True, theme=custom_theme)
apps/pdf_cropper.py CHANGED
@@ -3,6 +3,8 @@ from PIL import Image, ImageChops
3
  import img2pdf
4
  import io
5
 
 
 
6
  # --- 核心逻辑 (复用之前的代码) ---
7
  def trim_whitespace(im, fuzz_level=20):
8
  bg = Image.new(im.mode, im.size, (255, 255, 255))
@@ -15,10 +17,12 @@ def trim_whitespace(im, fuzz_level=20):
15
 
16
  def process_pipeline(files, quality, fuzz_level, progress=gr.Progress()):
17
  if not files: return None
18
- pdf_components = []
 
19
  for filepath in progress.tqdm(files, desc="处理中"):
20
  try:
21
  with Image.open(filepath) as img:
 
22
  if img.mode in ('RGBA', 'LA') or (img.mode == 'P' and 'transparency' in img.info):
23
  bg = Image.new('RGB', img.size, (255, 255, 255))
24
  if img.mode!= 'RGBA': img = img.convert('RGBA')
@@ -27,17 +31,26 @@ def process_pipeline(files, quality, fuzz_level, progress=gr.Progress()):
27
  else:
28
  img = img.convert('RGB')
29
 
 
30
  trimmed = trim_whitespace(img, fuzz_level)
31
- byte_arr = io.BytesIO()
32
- trimmed.save(byte_arr, format='JPEG', quality=int(quality))
33
- pdf_components.append(byte_arr.getvalue())
34
- except Exception: continue
 
 
 
 
 
 
 
 
 
 
 
 
35
 
36
- if not pdf_components: return None
37
- output_filename = "output_document.pdf"
38
- with open(output_filename, "wb") as f:
39
- f.write(img2pdf.convert(pdf_components))
40
- return output_filename
41
 
42
  # --- 模块化 UI 接口 ---
43
  def create_ui():
@@ -48,11 +61,11 @@ def create_ui():
48
  """
49
  with gr.Row():
50
  with gr.Column():
51
- file_input = gr.File(file_count="multiple", file_types=["image"], label="上传图片")
52
- quality = gr.Slider(10, 100, 90, label="质量")
53
  fuzz = gr.Slider(0, 100, 30, label="容差")
54
  btn = gr.Button("开始处理", variant="primary")
55
  with gr.Column():
56
- output = gr.File(label="下载 PDF")
57
 
58
  btn.click(process_pipeline, [file_input, quality, fuzz], output)
 
3
  import img2pdf
4
  import io
5
 
6
+ import os
7
+
8
  # --- 核心逻辑 (复用之前的代码) ---
9
  def trim_whitespace(im, fuzz_level=20):
10
  bg = Image.new(im.mode, im.size, (255, 255, 255))
 
17
 
18
  def process_pipeline(files, quality, fuzz_level, progress=gr.Progress()):
19
  if not files: return None
20
+ output_files = []
21
+
22
  for filepath in progress.tqdm(files, desc="处理中"):
23
  try:
24
  with Image.open(filepath) as img:
25
+ # 处理透明背景
26
  if img.mode in ('RGBA', 'LA') or (img.mode == 'P' and 'transparency' in img.info):
27
  bg = Image.new('RGB', img.size, (255, 255, 255))
28
  if img.mode!= 'RGBA': img = img.convert('RGBA')
 
31
  else:
32
  img = img.convert('RGB')
33
 
34
+ # 裁边
35
  trimmed = trim_whitespace(img, fuzz_level)
36
+
37
+ # 获取原始文件名并构建输出路径
38
+ original_name = os.path.basename(filepath)
39
+ name, _ = os.path.splitext(original_name)
40
+ # 保持文件名,改为 .pdf 后缀
41
+ output_filename = f"{name}_cropped.pdf"
42
+
43
+ # 保存为 PDF
44
+ # img2pdf 需要 bytes 或文件路径,这里我们先存为临时图片再转,或者直接用 PIL save pdf
45
+ # PIL save PDF 支持单张
46
+ trimmed.save(output_filename, "PDF", resolution=100.0, save_all=True)
47
+ output_files.append(output_filename)
48
+
49
+ except Exception as e:
50
+ print(f"Error processing {filepath}: {e}")
51
+ continue
52
 
53
+ return output_files
 
 
 
 
54
 
55
  # --- 模块化 UI 接口 ---
56
  def create_ui():
 
61
  """
62
  with gr.Row():
63
  with gr.Column():
64
+ file_input = gr.File(file_count="multiple", file_types=["image"], label="上传图片 (支持多选)")
65
+ quality = gr.Slider(10, 100, 90, label="质量 (仅用于压缩,当前直接转PDF可忽略)")
66
  fuzz = gr.Slider(0, 100, 30, label="容差")
67
  btn = gr.Button("开始处理", variant="primary")
68
  with gr.Column():
69
+ output = gr.File(label="下载结果", file_count="multiple")
70
 
71
  btn.click(process_pipeline, [file_input, quality, fuzz], output)
apps/text_diff.py ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from difflib import Differ
3
+
4
+ def diff_texts(text1, text2):
5
+ d = Differ()
6
+ # 使用 splitlines(keepends=True) 可以保留换行符,使 diff 更自然
7
+ # 但 HighlightedText 通常基于 token/word,这里我们按单词分割来做细粒度对比
8
+ # 如果需要按行对比,可以调整逻辑
9
+
10
+ # 这里演示按单词/字符流的对比,效果类似 Code Diff
11
+ return [
12
+ (token[2:], token[0] if token[0] != " " else None)
13
+ for token in d.compare(text1.splitlines(keepends=True), text2.splitlines(keepends=True))
14
+ ]
15
+
16
+ def diff_texts_word_level(text1, text2):
17
+ d = Differ()
18
+ # 按单词分割,更适合文章类文本
19
+ diff = d.compare(text1.split(), text2.split())
20
+ result = []
21
+ for token in diff:
22
+ code = token[0]
23
+ word = token[2:] + " " # 补回空格
24
+ if code == " ":
25
+ result.append((word, None))
26
+ elif code == "-":
27
+ result.append((word, "-")) # 删除
28
+ elif code == "+":
29
+ result.append((word, "+")) # 新增
30
+ return result
31
+
32
+ def create_ui():
33
+ with gr.Row():
34
+ with gr.Column():
35
+ t1 = gr.Textbox(label="原始文本 (Original)", lines=15, placeholder="输入旧版本文本...")
36
+ with gr.Column():
37
+ t2 = gr.Textbox(label="新文本 (New)", lines=15, placeholder="输入新版本文本...")
38
+
39
+ with gr.Row():
40
+ btn = gr.Button("🔍 对比差异", variant="primary")
41
+
42
+ # HighlightedText 组件非常适合展示 Diff
43
+ # color_map 定义了不同标记的颜色:+ 为绿色(新增),- 为红色(删除)
44
+ diff_output = gr.HighlightedText(
45
+ label="差异视图 (Diff View)",
46
+ combine_adjacent=True,
47
+ show_legend=True,
48
+ color_map={"+": "green", "-": "red"}
49
+ )
50
+
51
+ # 这里使用 word level diff,视觉效果更好
52
+ btn.click(diff_texts_word_level, inputs=[t1, t2], outputs=[diff_output])