Spaces:
Sleeping
Sleeping
| from fastapi import FastAPI, File, UploadFile | |
| from fastapi.responses import HTMLResponse | |
| import pdfplumber | |
| import re | |
| from fastapi.templating import Jinja2Templates | |
| from fastapi import Request | |
| app = FastAPI() | |
| # 設定 HTML 模板目錄 | |
| templates = Jinja2Templates(directory="templates") | |
| # 定義篩選中文字符的正則表達式 | |
| chinese_char_pattern = re.compile(r'[\u4e00-\u9fff]') | |
| # 定義篩選英文字符的正則表達式 | |
| english_char_pattern = re.compile(r'[A-Za-z]') | |
| # 統計中文和英文字符數的函數 | |
| def count_chinese_and_english_characters_in_pdf(file_path): | |
| total_chinese_chars = 0 | |
| total_english_chars = 0 | |
| page_counts = [] | |
| # 使用 pdfplumber 打開 PDF 文件 | |
| with pdfplumber.open(file_path) as pdf: | |
| # 遍歷每一頁 | |
| for page_num, page in enumerate(pdf.pages, start=1): | |
| # 提取頁面的文本 | |
| text = page.extract_text() | |
| if text: | |
| # 找到所有中文字符 | |
| chinese_chars = chinese_char_pattern.findall(text) | |
| chinese_char_count = len(chinese_chars) | |
| # 找到所有英文字符 | |
| english_chars = english_char_pattern.findall(text) | |
| english_char_count = len(english_chars) | |
| # 累加每頁的字符數到總數 | |
| total_chinese_chars += chinese_char_count | |
| total_english_chars += english_char_count | |
| # 保存每頁的字符數 | |
| page_counts.append({ | |
| "page_num": page_num, | |
| "chinese_count": chinese_char_count, | |
| "english_count": english_char_count | |
| }) | |
| # 返回每頁的字數及總字數 | |
| return { | |
| "page_counts": page_counts, | |
| "total_chinese_chars": total_chinese_chars, | |
| "total_english_chars": total_english_chars, | |
| "total_chars": total_chinese_chars + total_english_chars | |
| } | |
| # 顯示上傳表單的首頁 | |
| async def show_form(request: Request): | |
| return templates.TemplateResponse("upload_form.html", {"request": request}) | |
| # 上傳並處理文件的 API | |
| async def upload_file(request: Request, file: UploadFile = File(...)): | |
| if file.content_type == "application/pdf": | |
| # 將上傳的文件存儲在本地 | |
| file_name = file.filename | |
| with open(f"{file_name}", "wb") as f: | |
| f.write(await file.read()) | |
| # 計算字符數 | |
| result = count_chinese_and_english_characters_in_pdf(file_name) | |
| # 返回結果,並顯示在前端,包括檔案名稱 | |
| return templates.TemplateResponse("upload_form.html", { | |
| "request": request, | |
| "file_name": file_name, | |
| "page_counts": result["page_counts"], | |
| "total_chinese": result["total_chinese_chars"], | |
| "total_english": result["total_english_chars"], | |
| "total_chars": result["total_chars"] | |
| }) | |
| else: | |
| return {"error": "只接受 PDF 文件"} | |