Spaces:
Sleeping
Sleeping
File size: 10,406 Bytes
f3a45af de02f53 087ebab de02f53 087ebab de02f53 087ebab de02f53 087ebab de02f53 087ebab de02f53 087ebab de02f53 087ebab de02f53 087ebab de02f53 087ebab de02f53 087ebab de02f53 087ebab de02f53 087ebab de02f53 087ebab de02f53 087ebab de02f53 087ebab de02f53 087ebab de02f53 087ebab de02f53 087ebab de02f53 087ebab de02f53 087ebab de02f53 087ebab de02f53 087ebab de02f53 087ebab de02f53 087ebab de02f53 087ebab de02f53 087ebab de02f53 087ebab de02f53 087ebab de02f53 087ebab de02f53 087ebab de02f53 087ebab de02f53 087ebab de02f53 087ebab de02f53 087ebab de02f53 087ebab de02f53 087ebab de02f53 087ebab de02f53 087ebab de02f53 087ebab de02f53 f3a45af 087ebab de02f53 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 |
import streamlit as st
import PyPDF2
import pandas as pd
import os
import google.generativeai as genai
from datetime import datetime
import logging
import sys
import base64
import tempfile
# Create logs directory in a writable location
log_dir = "/tmp/logs"
os.makedirs(log_dir, exist_ok=True)
# Configure logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s',
handlers=[
logging.FileHandler(os.path.join(log_dir, "pdf_processing.log")),
logging.StreamHandler(sys.stdout)
]
)
logger = logging.getLogger(__name__)
# Page configuration
st.set_page_config(
page_title="PDF處理與Gemini翻譯工具",
page_icon="📄",
layout="wide",
initial_sidebar_state="expanded"
)
# App title and introduction
st.title("📄 PDF處理與Gemini翻譯工具")
st.markdown("上傳PDF檔案,選擇要處理的頁面,讓Gemini解釋內容並翻譯成繁體中文。")
# Sidebar - Settings area
with st.sidebar:
st.header("設定")
# API key input - Using st.secrets is more secure but requires setup
api_key = st.text_input(
"Gemini API金鑰",
value="", # Remove hardcoded API key
type="password"
)
# Upload PDF file
uploaded_file = st.file_uploader("上傳PDF檔案", type=["pdf"])
# Processing options block
with st.expander("處理選項", expanded=True):
# Initialize session_state
if 'total_pages' not in st.session_state:
st.session_state.total_pages = 0
if 'page_content' not in st.session_state:
st.session_state.page_content = {}
# Page selection (only shown after file upload)
if uploaded_file is not None:
# Read PDF and get page count
try:
# Create a temporary file to avoid potential security issues with direct file uploads
with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as tmp_file:
tmp_file.write(uploaded_file.getvalue())
tmp_path = tmp_file.name
pdf_reader = PyPDF2.PdfReader(tmp_path)
st.session_state.total_pages = len(pdf_reader.pages)
# Load PDF content to session_state (if not already loaded)
if len(st.session_state.page_content) == 0:
with st.spinner("正在加載PDF..."):
for i in range(st.session_state.total_pages):
st.session_state.page_content[i+1] = pdf_reader.pages[i].extract_text()
# Remove the temporary file
os.unlink(tmp_path)
# Page selection slider
page_to_process = st.slider(
"選擇要處理的頁面",
min_value=1,
max_value=st.session_state.total_pages,
value=1
)
st.info(f"PDF共有 {st.session_state.total_pages} 頁")
except Exception as e:
logger.error(f"無法讀取PDF: {str(e)}")
st.error(f"無法讀取PDF: {str(e)}")
else:
st.info("請先上傳PDF檔案")
page_to_process = 1
# Advanced options
with st.expander("進階選項"):
# Instructions for Gemini
instruction = st.text_area(
"給Gemini的指示詞",
value="請詳細解釋以下內容的主要要點和重要信息",
height=100
)
# Output filename
output_filename = st.text_input(
"輸出CSV檔名",
value="gemini_translated_results.csv"
)
# Main function definitions
def setup_gemini_api(api_key):
"""設置Gemini API"""
try:
genai.configure(api_key=api_key)
return genai.GenerativeModel("gemini-1.5-flash")
except Exception as e:
logger.error(f"Gemini API設置失敗: {e}")
st.error(f"API設置失敗: {str(e)}")
return None
def process_with_gemini(model, text, instruction):
"""使用Gemini處理文本"""
try:
prompt = f"{instruction}:\n\n{text}"
response = model.generate_content(prompt)
return response.text.strip()
except Exception as e:
logger.error(f"Gemini處理失敗: {e}")
return f"處理失敗: {str(e)}"
def translate_with_gemini(model, text):
"""使用Gemini將文本翻譯成繁體中文"""
try:
prompt = f"""
請將以下文本翻譯成繁體中文,保持專業和準確性:
{text}
只需要返回翻譯後的文本,不要加入其他解釋或備註。
"""
response = model.generate_content(prompt)
return response.text.strip()
except Exception as e:
logger.error(f"Gemini翻譯失敗: {e}")
return f"翻譯失敗: {str(e)}"
def get_csv_download_link(df, filename="data.csv"):
"""生成CSV檔案下載連結"""
csv = df.to_csv(index=False)
b64 = base64.b64encode(csv.encode()).decode()
href = f'<a href="data:file/csv;base64,{b64}" download="{filename}">下載 CSV 檔案</a>'
return href
# Main content area
if uploaded_file is not None:
# Display page content preview
st.header("頁面內容預覽")
# Get selected page content from session_state
if page_to_process in st.session_state.page_content:
page_text = st.session_state.page_content[page_to_process]
st.text_area(
f"第 {page_to_process} 頁內容",
value=page_text,
height=150,
disabled=True
)
else:
st.warning("無法獲取選定頁面的內容")
# Process button
process_button = st.button("處理並翻譯", type="primary", use_container_width=True)
# When process button is clicked
if process_button:
if not api_key:
st.error("請輸入Gemini API金鑰!")
else:
# Set up progress display
progress_placeholder = st.empty()
results_placeholder = st.empty()
with st.spinner("正在處理中..."):
progress_bar = progress_placeholder.progress(0)
# Set up API
model = setup_gemini_api(api_key)
if model:
progress_bar.progress(20)
# Get selected page content
page_text = st.session_state.page_content[page_to_process]
# Process with Gemini
progress_placeholder.text("正在使用Gemini解釋內容...")
explanation = process_with_gemini(model, page_text, instruction)
progress_bar.progress(60)
# Translate to Traditional Chinese
progress_placeholder.text("正在翻譯成繁體中文...")
translation = translate_with_gemini(model, explanation)
progress_bar.progress(90)
# Create results DataFrame
results_data = {
"時間戳記": [datetime.now().isoformat()],
"原始內容": [page_text[:5000] + "..." if len(page_text) > 5000 else page_text],
"Gemini解釋": [explanation],
"繁體中文翻譯": [translation]
}
results_df = pd.DataFrame(results_data)
# Save as CSV to a writable location
csv_path = os.path.join("/tmp", output_filename)
try:
results_df.to_csv(csv_path, index=False, encoding="utf-8-sig")
logger.info(f"CSV saved to {csv_path}")
except Exception as e:
logger.error(f"Failed to save CSV: {e}")
st.error(f"無法保存CSV: {str(e)}")
# Complete
progress_bar.progress(100)
progress_placeholder.empty()
# Display results
st.success("處理完成!")
# Create tabs to display results
tab1, tab2, tab3 = st.tabs(["Gemini解釋", "繁體中文翻譯", "CSV資料"])
with tab1:
st.subheader("Gemini解釋結果")
st.write(explanation)
with tab2:
st.subheader("繁體中文翻譯")
st.write(translation)
with tab3:
st.subheader("CSV資料預覽")
st.dataframe(results_df)
st.markdown(get_csv_download_link(results_df, output_filename), unsafe_allow_html=True)
st.info(f"CSV檔案已準備好下載。檔名: {output_filename}")
else:
# Content to display when no file is uploaded
st.info("👈 請從側邊欄上傳PDF檔案開始")
# Display usage instructions
with st.expander("使用說明", expanded=True):
st.markdown("""
### 如何使用這個工具:
1. **上傳PDF檔案** - 從側邊欄選擇並上傳PDF檔案
2. **選擇頁面** - 使用滑桿選擇要處理的頁面
3. **設定API金鑰** - 輸入您的Gemini API金鑰
4. **自訂指示詞** - 可選擇修改給Gemini的指示詞
5. **處理與翻譯** - 點擊"處理並翻譯"按鈕
6. **查看結果** - 在選項卡中查看Gemini的解釋和繁體中文翻譯
7. **下載結果** - 下載CSV格式的結果檔案
### 功能特點:
- 逐頁預覽PDF內容
- 使用Gemini AI解釋文本
- 自動翻譯成繁體中文
- 結果以CSV格式儲存
""")
# Footer
st.markdown("---")
st.markdown("📄 PDF處理與Gemini翻譯工具 | 由Streamlit和Google Gemini AI提供技術支持") |