Pdf_geminiAI / src /streamlit_app.py
Roberta2024's picture
Update src/streamlit_app.py
087ebab verified
import streamlit as st
import PyPDF2
import pandas as pd
import os
import google.generativeai as genai
from datetime import datetime
import logging
import sys
import base64
import tempfile
# Create logs directory in a writable location
log_dir = "/tmp/logs"
os.makedirs(log_dir, exist_ok=True)
# Configure logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s',
handlers=[
logging.FileHandler(os.path.join(log_dir, "pdf_processing.log")),
logging.StreamHandler(sys.stdout)
]
)
logger = logging.getLogger(__name__)
# Page configuration
st.set_page_config(
page_title="PDF處理與Gemini翻譯工具",
page_icon="📄",
layout="wide",
initial_sidebar_state="expanded"
)
# App title and introduction
st.title("📄 PDF處理與Gemini翻譯工具")
st.markdown("上傳PDF檔案,選擇要處理的頁面,讓Gemini解釋內容並翻譯成繁體中文。")
# Sidebar - Settings area
with st.sidebar:
st.header("設定")
# API key input - Using st.secrets is more secure but requires setup
api_key = st.text_input(
"Gemini API金鑰",
value="", # Remove hardcoded API key
type="password"
)
# Upload PDF file
uploaded_file = st.file_uploader("上傳PDF檔案", type=["pdf"])
# Processing options block
with st.expander("處理選項", expanded=True):
# Initialize session_state
if 'total_pages' not in st.session_state:
st.session_state.total_pages = 0
if 'page_content' not in st.session_state:
st.session_state.page_content = {}
# Page selection (only shown after file upload)
if uploaded_file is not None:
# Read PDF and get page count
try:
# Create a temporary file to avoid potential security issues with direct file uploads
with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as tmp_file:
tmp_file.write(uploaded_file.getvalue())
tmp_path = tmp_file.name
pdf_reader = PyPDF2.PdfReader(tmp_path)
st.session_state.total_pages = len(pdf_reader.pages)
# Load PDF content to session_state (if not already loaded)
if len(st.session_state.page_content) == 0:
with st.spinner("正在加載PDF..."):
for i in range(st.session_state.total_pages):
st.session_state.page_content[i+1] = pdf_reader.pages[i].extract_text()
# Remove the temporary file
os.unlink(tmp_path)
# Page selection slider
page_to_process = st.slider(
"選擇要處理的頁面",
min_value=1,
max_value=st.session_state.total_pages,
value=1
)
st.info(f"PDF共有 {st.session_state.total_pages} 頁")
except Exception as e:
logger.error(f"無法讀取PDF: {str(e)}")
st.error(f"無法讀取PDF: {str(e)}")
else:
st.info("請先上傳PDF檔案")
page_to_process = 1
# Advanced options
with st.expander("進階選項"):
# Instructions for Gemini
instruction = st.text_area(
"給Gemini的指示詞",
value="請詳細解釋以下內容的主要要點和重要信息",
height=100
)
# Output filename
output_filename = st.text_input(
"輸出CSV檔名",
value="gemini_translated_results.csv"
)
# Main function definitions
def setup_gemini_api(api_key):
"""設置Gemini API"""
try:
genai.configure(api_key=api_key)
return genai.GenerativeModel("gemini-1.5-flash")
except Exception as e:
logger.error(f"Gemini API設置失敗: {e}")
st.error(f"API設置失敗: {str(e)}")
return None
def process_with_gemini(model, text, instruction):
"""使用Gemini處理文本"""
try:
prompt = f"{instruction}:\n\n{text}"
response = model.generate_content(prompt)
return response.text.strip()
except Exception as e:
logger.error(f"Gemini處理失敗: {e}")
return f"處理失敗: {str(e)}"
def translate_with_gemini(model, text):
"""使用Gemini將文本翻譯成繁體中文"""
try:
prompt = f"""
請將以下文本翻譯成繁體中文,保持專業和準確性:
{text}
只需要返回翻譯後的文本,不要加入其他解釋或備註。
"""
response = model.generate_content(prompt)
return response.text.strip()
except Exception as e:
logger.error(f"Gemini翻譯失敗: {e}")
return f"翻譯失敗: {str(e)}"
def get_csv_download_link(df, filename="data.csv"):
"""生成CSV檔案下載連結"""
csv = df.to_csv(index=False)
b64 = base64.b64encode(csv.encode()).decode()
href = f'<a href="data:file/csv;base64,{b64}" download="{filename}">下載 CSV 檔案</a>'
return href
# Main content area
if uploaded_file is not None:
# Display page content preview
st.header("頁面內容預覽")
# Get selected page content from session_state
if page_to_process in st.session_state.page_content:
page_text = st.session_state.page_content[page_to_process]
st.text_area(
f"第 {page_to_process} 頁內容",
value=page_text,
height=150,
disabled=True
)
else:
st.warning("無法獲取選定頁面的內容")
# Process button
process_button = st.button("處理並翻譯", type="primary", use_container_width=True)
# When process button is clicked
if process_button:
if not api_key:
st.error("請輸入Gemini API金鑰!")
else:
# Set up progress display
progress_placeholder = st.empty()
results_placeholder = st.empty()
with st.spinner("正在處理中..."):
progress_bar = progress_placeholder.progress(0)
# Set up API
model = setup_gemini_api(api_key)
if model:
progress_bar.progress(20)
# Get selected page content
page_text = st.session_state.page_content[page_to_process]
# Process with Gemini
progress_placeholder.text("正在使用Gemini解釋內容...")
explanation = process_with_gemini(model, page_text, instruction)
progress_bar.progress(60)
# Translate to Traditional Chinese
progress_placeholder.text("正在翻譯成繁體中文...")
translation = translate_with_gemini(model, explanation)
progress_bar.progress(90)
# Create results DataFrame
results_data = {
"時間戳記": [datetime.now().isoformat()],
"原始內容": [page_text[:5000] + "..." if len(page_text) > 5000 else page_text],
"Gemini解釋": [explanation],
"繁體中文翻譯": [translation]
}
results_df = pd.DataFrame(results_data)
# Save as CSV to a writable location
csv_path = os.path.join("/tmp", output_filename)
try:
results_df.to_csv(csv_path, index=False, encoding="utf-8-sig")
logger.info(f"CSV saved to {csv_path}")
except Exception as e:
logger.error(f"Failed to save CSV: {e}")
st.error(f"無法保存CSV: {str(e)}")
# Complete
progress_bar.progress(100)
progress_placeholder.empty()
# Display results
st.success("處理完成!")
# Create tabs to display results
tab1, tab2, tab3 = st.tabs(["Gemini解釋", "繁體中文翻譯", "CSV資料"])
with tab1:
st.subheader("Gemini解釋結果")
st.write(explanation)
with tab2:
st.subheader("繁體中文翻譯")
st.write(translation)
with tab3:
st.subheader("CSV資料預覽")
st.dataframe(results_df)
st.markdown(get_csv_download_link(results_df, output_filename), unsafe_allow_html=True)
st.info(f"CSV檔案已準備好下載。檔名: {output_filename}")
else:
# Content to display when no file is uploaded
st.info("👈 請從側邊欄上傳PDF檔案開始")
# Display usage instructions
with st.expander("使用說明", expanded=True):
st.markdown("""
### 如何使用這個工具:
1. **上傳PDF檔案** - 從側邊欄選擇並上傳PDF檔案
2. **選擇頁面** - 使用滑桿選擇要處理的頁面
3. **設定API金鑰** - 輸入您的Gemini API金鑰
4. **自訂指示詞** - 可選擇修改給Gemini的指示詞
5. **處理與翻譯** - 點擊"處理並翻譯"按鈕
6. **查看結果** - 在選項卡中查看Gemini的解釋和繁體中文翻譯
7. **下載結果** - 下載CSV格式的結果檔案
### 功能特點:
- 逐頁預覽PDF內容
- 使用Gemini AI解釋文本
- 自動翻譯成繁體中文
- 結果以CSV格式儲存
""")
# Footer
st.markdown("---")
st.markdown("📄 PDF處理與Gemini翻譯工具 | 由Streamlit和Google Gemini AI提供技術支持")