Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import PyPDF2 | |
| import pandas as pd | |
| import os | |
| import google.generativeai as genai | |
| import csv | |
| from datetime import datetime | |
| import logging | |
| import sys | |
| import io | |
| import tempfile | |
| import base64 | |
| # 設定日誌 | |
| logging.basicConfig( | |
| level=logging.INFO, | |
| format='%(asctime)s - %(levelname)s - %(message)s', | |
| handlers=[ | |
| logging.FileHandler("pdf_processing.log"), | |
| logging.StreamHandler(sys.stdout) | |
| ] | |
| ) | |
| logger = logging.getLogger(__name__) | |
| # 設定頁面配置 | |
| st.set_page_config( | |
| page_title="PDF處理與Gemini翻譯工具", | |
| page_icon="📄", | |
| layout="wide", | |
| initial_sidebar_state="expanded" | |
| ) | |
| # 應用程式標題和介紹 | |
| st.title("📄 PDF處理與Gemini翻譯工具") | |
| st.markdown("上傳PDF檔案,選擇要處理的頁面,讓Gemini解釋內容並翻譯成繁體中文。") | |
| # 側邊欄 - 設定區域 | |
| with st.sidebar: | |
| st.header("設定") | |
| # API金鑰輸入 | |
| api_key = st.text_input( | |
| "Gemini API金鑰", | |
| value="AIzaSyC3fbJVYQbINa8ztuOS5dFE4ud0I9jhy3o", # 預設值,實際使用應從設定檔讀取 | |
| type="password" | |
| ) | |
| # 上傳PDF檔案 | |
| uploaded_file = st.file_uploader("上傳PDF檔案", type=["pdf"]) | |
| # 處理選項區塊 | |
| with st.expander("處理選項", expanded=True): | |
| # 初始化session_state | |
| if 'total_pages' not in st.session_state: | |
| st.session_state.total_pages = 0 | |
| if 'page_content' not in st.session_state: | |
| st.session_state.page_content = {} | |
| # 頁面選擇(只在上傳檔案後顯示) | |
| if uploaded_file is not None: | |
| # 讀取PDF並獲取頁數 | |
| try: | |
| pdf_reader = PyPDF2.PdfReader(uploaded_file) | |
| st.session_state.total_pages = len(pdf_reader.pages) | |
| # 加載PDF內容到session_state(如果尚未加載) | |
| if len(st.session_state.page_content) == 0: | |
| with st.spinner("正在加載PDF..."): | |
| for i in range(st.session_state.total_pages): | |
| st.session_state.page_content[i+1] = pdf_reader.pages[i].extract_text() | |
| # 頁面選擇滑桿 | |
| page_to_process = st.slider( | |
| "選擇要處理的頁面", | |
| min_value=1, | |
| max_value=st.session_state.total_pages, | |
| value=1 | |
| ) | |
| st.info(f"PDF共有 {st.session_state.total_pages} 頁") | |
| except Exception as e: | |
| st.error(f"無法讀取PDF: {str(e)}") | |
| else: | |
| st.info("請先上傳PDF檔案") | |
| page_to_process = 1 | |
| # 進階選項 | |
| with st.expander("進階選項"): | |
| # 給Gemini的指示詞 | |
| instruction = st.text_area( | |
| "給Gemini的指示詞", | |
| value="請詳細解釋以下內容的主要要點和重要信息", | |
| height=100 | |
| ) | |
| # 輸出檔名 | |
| output_filename = st.text_input( | |
| "輸出CSV檔名", | |
| value="gemini_translated_results.csv" | |
| ) | |
| # 主要功能函數 | |
| def setup_gemini_api(api_key): | |
| """設置Gemini API""" | |
| try: | |
| os.environ["GOOGLE_API_KEY"] = api_key | |
| genai.configure(api_key=api_key) | |
| return genai.GenerativeModel("gemini-1.5-flash") | |
| except Exception as e: | |
| logger.error(f"Gemini API設置失敗: {e}") | |
| st.error(f"API設置失敗: {str(e)}") | |
| return None | |
| def process_with_gemini(model, text, instruction): | |
| """使用Gemini處理文本""" | |
| try: | |
| prompt = f"{instruction}:\n\n{text}" | |
| response = model.generate_content(prompt) | |
| return response.text.strip() | |
| except Exception as e: | |
| logger.error(f"Gemini處理失敗: {e}") | |
| return f"處理失敗: {str(e)}" | |
| def translate_with_gemini(model, text): | |
| """使用Gemini將文本翻譯成繁體中文""" | |
| try: | |
| prompt = f""" | |
| 請將以下文本翻譯成繁體中文,保持專業和準確性: | |
| {text} | |
| 只需要返回翻譯後的文本,不要加入其他解釋或備註。 | |
| """ | |
| response = model.generate_content(prompt) | |
| return response.text.strip() | |
| except Exception as e: | |
| logger.error(f"Gemini翻譯失敗: {e}") | |
| return f"翻譯失敗: {str(e)}" | |
| def get_csv_download_link(df, filename="data.csv"): | |
| """生成CSV檔案下載連結""" | |
| csv = df.to_csv(index=False) | |
| b64 = base64.b64encode(csv.encode()).decode() | |
| href = f'<a href="data:file/csv;base64,{b64}" download="{filename}">下載 CSV 檔案</a>' | |
| return href | |
| # 主要內容區域 | |
| if uploaded_file is not None: | |
| # 顯示頁面內容預覽 | |
| st.header("頁面內容預覽") | |
| # 從session_state獲取選定頁面的內容 | |
| if page_to_process in st.session_state.page_content: | |
| page_text = st.session_state.page_content[page_to_process] | |
| st.text_area( | |
| f"第 {page_to_process} 頁內容", | |
| value=page_text, | |
| height=150, | |
| disabled=True | |
| ) | |
| else: | |
| st.warning("無法獲取選定頁面的內容") | |
| # 處理按鈕 | |
| process_button = st.button("處理並翻譯", type="primary", use_container_width=True) | |
| # 當按下處理按鈕 | |
| if process_button: | |
| if not api_key: | |
| st.error("請輸入Gemini API金鑰!") | |
| else: | |
| # 設置進度顯示 | |
| progress_placeholder = st.empty() | |
| results_placeholder = st.empty() | |
| with st.spinner("正在處理中..."): | |
| progress_bar = progress_placeholder.progress(0) | |
| # 設置API | |
| model = setup_gemini_api(api_key) | |
| if model: | |
| progress_bar.progress(20) | |
| # 獲取選定頁面的內容 | |
| page_text = st.session_state.page_content[page_to_process] | |
| # 使用Gemini處理 | |
| progress_placeholder.text("正在使用Gemini解釋內容...") | |
| explanation = process_with_gemini(model, page_text, instruction) | |
| progress_bar.progress(60) | |
| # 翻譯成繁體中文 | |
| progress_placeholder.text("正在翻譯成繁體中文...") | |
| translation = translate_with_gemini(model, explanation) | |
| progress_bar.progress(90) | |
| # 創建結果DataFrame | |
| results_data = { | |
| "時間戳記": [datetime.now().isoformat()], | |
| "原始內容": [page_text[:5000] + "..." if len(page_text) > 5000 else page_text], | |
| "Gemini解釋": [explanation], | |
| "繁體中文翻譯": [translation] | |
| } | |
| results_df = pd.DataFrame(results_data) | |
| # 保存為CSV(臨時) | |
| results_df.to_csv(output_filename, index=False, encoding="utf-8-sig") | |
| # 完成 | |
| progress_bar.progress(100) | |
| progress_placeholder.empty() | |
| # 顯示結果 | |
| st.success("處理完成!") | |
| # 創建選項卡顯示結果 | |
| tab1, tab2, tab3 = st.tabs(["Gemini解釋", "繁體中文翻譯", "CSV資料"]) | |
| with tab1: | |
| st.subheader("Gemini解釋結果") | |
| st.write(explanation) | |
| with tab2: | |
| st.subheader("繁體中文翻譯") | |
| st.write(translation) | |
| with tab3: | |
| st.subheader("CSV資料預覽") | |
| st.dataframe(results_df) | |
| st.markdown(get_csv_download_link(results_df, output_filename), unsafe_allow_html=True) | |
| st.info(f"CSV檔案已準備好下載。檔名: {output_filename}") | |
| else: | |
| # 未上傳檔案時顯示的內容 | |
| st.info("👈 請從側邊欄上傳PDF檔案開始") | |
| # 顯示使用說明 | |
| with st.expander("使用說明", expanded=True): | |
| st.markdown(""" | |
| ### 如何使用這個工具: | |
| 1. **上傳PDF檔案** - 從側邊欄選擇並上傳PDF檔案 | |
| 2. **選擇頁面** - 使用滑桿選擇要處理的頁面 | |
| 3. **設定API金鑰** - 輸入您的Gemini API金鑰(預設已填入,可修改) | |
| 4. **自訂指示詞** - 可選擇修改給Gemini的指示詞 | |
| 5. **處理與翻譯** - 點擊"處理並翻譯"按鈕 | |
| 6. **查看結果** - 在選項卡中查看Gemini的解釋和繁體中文翻譯 | |
| 7. **下載結果** - 下載CSV格式的結果檔案 | |
| ### 功能特點: | |
| - 逐頁預覽PDF內容 | |
| - 使用Gemini AI解釋文本 | |
| - 自動翻譯成繁體中文 | |
| - 結果以CSV格式儲存 | |
| """) | |
| # 頁尾 | |
| st.markdown("---") | |
| st.markdown("📄 PDF處理與Gemini翻譯工具 | 由Streamlit和Google Gemini AI提供技術支持") |