Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import PyPDF2 | |
| import pandas as pd | |
| import os | |
| import google.generativeai as genai | |
| from datetime import datetime | |
| import logging | |
| import sys | |
| import base64 | |
| import tempfile | |
| # Create logs directory in a writable location | |
| log_dir = "/tmp/logs" | |
| os.makedirs(log_dir, exist_ok=True) | |
| # Configure logging | |
| logging.basicConfig( | |
| level=logging.INFO, | |
| format='%(asctime)s - %(levelname)s - %(message)s', | |
| handlers=[ | |
| logging.FileHandler(os.path.join(log_dir, "pdf_processing.log")), | |
| logging.StreamHandler(sys.stdout) | |
| ] | |
| ) | |
| logger = logging.getLogger(__name__) | |
| # Page configuration | |
| st.set_page_config( | |
| page_title="PDF處理與Gemini翻譯工具", | |
| page_icon="📄", | |
| layout="wide", | |
| initial_sidebar_state="expanded" | |
| ) | |
| # App title and introduction | |
| st.title("📄 PDF處理與Gemini翻譯工具") | |
| st.markdown("上傳PDF檔案,選擇要處理的頁面,讓Gemini解釋內容並翻譯成繁體中文。") | |
| # Sidebar - Settings area | |
| with st.sidebar: | |
| st.header("設定") | |
| # API key input - Using st.secrets is more secure but requires setup | |
| api_key = st.text_input( | |
| "Gemini API金鑰", | |
| value="", # Remove hardcoded API key | |
| type="password" | |
| ) | |
| # Upload PDF file | |
| uploaded_file = st.file_uploader("上傳PDF檔案", type=["pdf"]) | |
| # Processing options block | |
| with st.expander("處理選項", expanded=True): | |
| # Initialize session_state | |
| if 'total_pages' not in st.session_state: | |
| st.session_state.total_pages = 0 | |
| if 'page_content' not in st.session_state: | |
| st.session_state.page_content = {} | |
| # Page selection (only shown after file upload) | |
| if uploaded_file is not None: | |
| # Read PDF and get page count | |
| try: | |
| # Create a temporary file to avoid potential security issues with direct file uploads | |
| with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as tmp_file: | |
| tmp_file.write(uploaded_file.getvalue()) | |
| tmp_path = tmp_file.name | |
| pdf_reader = PyPDF2.PdfReader(tmp_path) | |
| st.session_state.total_pages = len(pdf_reader.pages) | |
| # Load PDF content to session_state (if not already loaded) | |
| if len(st.session_state.page_content) == 0: | |
| with st.spinner("正在加載PDF..."): | |
| for i in range(st.session_state.total_pages): | |
| st.session_state.page_content[i+1] = pdf_reader.pages[i].extract_text() | |
| # Remove the temporary file | |
| os.unlink(tmp_path) | |
| # Page selection slider | |
| page_to_process = st.slider( | |
| "選擇要處理的頁面", | |
| min_value=1, | |
| max_value=st.session_state.total_pages, | |
| value=1 | |
| ) | |
| st.info(f"PDF共有 {st.session_state.total_pages} 頁") | |
| except Exception as e: | |
| logger.error(f"無法讀取PDF: {str(e)}") | |
| st.error(f"無法讀取PDF: {str(e)}") | |
| else: | |
| st.info("請先上傳PDF檔案") | |
| page_to_process = 1 | |
| # Advanced options | |
| with st.expander("進階選項"): | |
| # Instructions for Gemini | |
| instruction = st.text_area( | |
| "給Gemini的指示詞", | |
| value="請詳細解釋以下內容的主要要點和重要信息", | |
| height=100 | |
| ) | |
| # Output filename | |
| output_filename = st.text_input( | |
| "輸出CSV檔名", | |
| value="gemini_translated_results.csv" | |
| ) | |
| # Main function definitions | |
| def setup_gemini_api(api_key): | |
| """設置Gemini API""" | |
| try: | |
| genai.configure(api_key=api_key) | |
| return genai.GenerativeModel("gemini-1.5-flash") | |
| except Exception as e: | |
| logger.error(f"Gemini API設置失敗: {e}") | |
| st.error(f"API設置失敗: {str(e)}") | |
| return None | |
| def process_with_gemini(model, text, instruction): | |
| """使用Gemini處理文本""" | |
| try: | |
| prompt = f"{instruction}:\n\n{text}" | |
| response = model.generate_content(prompt) | |
| return response.text.strip() | |
| except Exception as e: | |
| logger.error(f"Gemini處理失敗: {e}") | |
| return f"處理失敗: {str(e)}" | |
| def translate_with_gemini(model, text): | |
| """使用Gemini將文本翻譯成繁體中文""" | |
| try: | |
| prompt = f""" | |
| 請將以下文本翻譯成繁體中文,保持專業和準確性: | |
| {text} | |
| 只需要返回翻譯後的文本,不要加入其他解釋或備註。 | |
| """ | |
| response = model.generate_content(prompt) | |
| return response.text.strip() | |
| except Exception as e: | |
| logger.error(f"Gemini翻譯失敗: {e}") | |
| return f"翻譯失敗: {str(e)}" | |
| def get_csv_download_link(df, filename="data.csv"): | |
| """生成CSV檔案下載連結""" | |
| csv = df.to_csv(index=False) | |
| b64 = base64.b64encode(csv.encode()).decode() | |
| href = f'<a href="data:file/csv;base64,{b64}" download="{filename}">下載 CSV 檔案</a>' | |
| return href | |
| # Main content area | |
| if uploaded_file is not None: | |
| # Display page content preview | |
| st.header("頁面內容預覽") | |
| # Get selected page content from session_state | |
| if page_to_process in st.session_state.page_content: | |
| page_text = st.session_state.page_content[page_to_process] | |
| st.text_area( | |
| f"第 {page_to_process} 頁內容", | |
| value=page_text, | |
| height=150, | |
| disabled=True | |
| ) | |
| else: | |
| st.warning("無法獲取選定頁面的內容") | |
| # Process button | |
| process_button = st.button("處理並翻譯", type="primary", use_container_width=True) | |
| # When process button is clicked | |
| if process_button: | |
| if not api_key: | |
| st.error("請輸入Gemini API金鑰!") | |
| else: | |
| # Set up progress display | |
| progress_placeholder = st.empty() | |
| results_placeholder = st.empty() | |
| with st.spinner("正在處理中..."): | |
| progress_bar = progress_placeholder.progress(0) | |
| # Set up API | |
| model = setup_gemini_api(api_key) | |
| if model: | |
| progress_bar.progress(20) | |
| # Get selected page content | |
| page_text = st.session_state.page_content[page_to_process] | |
| # Process with Gemini | |
| progress_placeholder.text("正在使用Gemini解釋內容...") | |
| explanation = process_with_gemini(model, page_text, instruction) | |
| progress_bar.progress(60) | |
| # Translate to Traditional Chinese | |
| progress_placeholder.text("正在翻譯成繁體中文...") | |
| translation = translate_with_gemini(model, explanation) | |
| progress_bar.progress(90) | |
| # Create results DataFrame | |
| results_data = { | |
| "時間戳記": [datetime.now().isoformat()], | |
| "原始內容": [page_text[:5000] + "..." if len(page_text) > 5000 else page_text], | |
| "Gemini解釋": [explanation], | |
| "繁體中文翻譯": [translation] | |
| } | |
| results_df = pd.DataFrame(results_data) | |
| # Save as CSV to a writable location | |
| csv_path = os.path.join("/tmp", output_filename) | |
| try: | |
| results_df.to_csv(csv_path, index=False, encoding="utf-8-sig") | |
| logger.info(f"CSV saved to {csv_path}") | |
| except Exception as e: | |
| logger.error(f"Failed to save CSV: {e}") | |
| st.error(f"無法保存CSV: {str(e)}") | |
| # Complete | |
| progress_bar.progress(100) | |
| progress_placeholder.empty() | |
| # Display results | |
| st.success("處理完成!") | |
| # Create tabs to display results | |
| tab1, tab2, tab3 = st.tabs(["Gemini解釋", "繁體中文翻譯", "CSV資料"]) | |
| with tab1: | |
| st.subheader("Gemini解釋結果") | |
| st.write(explanation) | |
| with tab2: | |
| st.subheader("繁體中文翻譯") | |
| st.write(translation) | |
| with tab3: | |
| st.subheader("CSV資料預覽") | |
| st.dataframe(results_df) | |
| st.markdown(get_csv_download_link(results_df, output_filename), unsafe_allow_html=True) | |
| st.info(f"CSV檔案已準備好下載。檔名: {output_filename}") | |
| else: | |
| # Content to display when no file is uploaded | |
| st.info("👈 請從側邊欄上傳PDF檔案開始") | |
| # Display usage instructions | |
| with st.expander("使用說明", expanded=True): | |
| st.markdown(""" | |
| ### 如何使用這個工具: | |
| 1. **上傳PDF檔案** - 從側邊欄選擇並上傳PDF檔案 | |
| 2. **選擇頁面** - 使用滑桿選擇要處理的頁面 | |
| 3. **設定API金鑰** - 輸入您的Gemini API金鑰 | |
| 4. **自訂指示詞** - 可選擇修改給Gemini的指示詞 | |
| 5. **處理與翻譯** - 點擊"處理並翻譯"按鈕 | |
| 6. **查看結果** - 在選項卡中查看Gemini的解釋和繁體中文翻譯 | |
| 7. **下載結果** - 下載CSV格式的結果檔案 | |
| ### 功能特點: | |
| - 逐頁預覽PDF內容 | |
| - 使用Gemini AI解釋文本 | |
| - 自動翻譯成繁體中文 | |
| - 結果以CSV格式儲存 | |
| """) | |
| # Footer | |
| st.markdown("---") | |
| st.markdown("📄 PDF處理與Gemini翻譯工具 | 由Streamlit和Google Gemini AI提供技術支持") |