from fastapi import FastAPI, File, UploadFile from fastapi.responses import HTMLResponse import pdfplumber import re from fastapi.templating import Jinja2Templates from fastapi import Request app = FastAPI() # 設定 HTML 模板目錄 templates = Jinja2Templates(directory="templates") # 定義篩選中文字符的正則表達式 chinese_char_pattern = re.compile(r'[\u4e00-\u9fff]') # 定義篩選英文字符的正則表達式 english_char_pattern = re.compile(r'[A-Za-z]') # 統計中文和英文字符數的函數 def count_chinese_and_english_characters_in_pdf(file_path): total_chinese_chars = 0 total_english_chars = 0 page_counts = [] # 使用 pdfplumber 打開 PDF 文件 with pdfplumber.open(file_path) as pdf: # 遍歷每一頁 for page_num, page in enumerate(pdf.pages, start=1): # 提取頁面的文本 text = page.extract_text() if text: # 找到所有中文字符 chinese_chars = chinese_char_pattern.findall(text) chinese_char_count = len(chinese_chars) # 找到所有英文字符 english_chars = english_char_pattern.findall(text) english_char_count = len(english_chars) # 累加每頁的字符數到總數 total_chinese_chars += chinese_char_count total_english_chars += english_char_count # 保存每頁的字符數 page_counts.append({ "page_num": page_num, "chinese_count": chinese_char_count, "english_count": english_char_count }) # 返回每頁的字數及總字數 return { "page_counts": page_counts, "total_chinese_chars": total_chinese_chars, "total_english_chars": total_english_chars, "total_chars": total_chinese_chars + total_english_chars } # 顯示上傳表單的首頁 @app.get("/", response_class=HTMLResponse) async def show_form(request: Request): return templates.TemplateResponse("upload_form.html", {"request": request}) # 上傳並處理文件的 API @app.post("/uploadfile/") async def upload_file(request: Request, file: UploadFile = File(...)): if file.content_type == "application/pdf": # 將上傳的文件存儲在本地 file_name = file.filename with open(f"{file_name}", "wb") as f: f.write(await file.read()) # 計算字符數 result = count_chinese_and_english_characters_in_pdf(file_name) # 返回結果,並顯示在前端,包括檔案名稱 return templates.TemplateResponse("upload_form.html", { "request": request, "file_name": file_name, "page_counts": result["page_counts"], "total_chinese": result["total_chinese_chars"], "total_english": result["total_english_chars"], "total_chars": result["total_chars"] }) else: return {"error": "只接受 PDF 文件"}