from dotenv import load_dotenv import os import streamlit as st from PyPDF2 import PdfReader from langchain.text_splitter import CharacterTextSplitter from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI from langchain_community.vectorstores import FAISS from langchain.chains.question_answering import load_qa_chain from langchain.prompts import PromptTemplate import tempfile # Load environment variables load_dotenv() # Set Gemini API key gemini_api_key = "AIzaSyCPNdM86kS3rR91mp7BxZaMolvQ0PqQiBY" os.environ["GOOGLE_API_KEY"] = gemini_api_key def get_pdf_text(pdf_files): """從多個PDF文件中提取文字""" raw_text = "" if pdf_files is None: return raw_text # 處理單個文件和多個文件 if not isinstance(pdf_files, list): pdf_files = [pdf_files] for pdf in pdf_files: try: # 檢查是否為上傳的文件物件或文件路徑 if hasattr(pdf, 'read'): # 這是來自Streamlit的上傳文件物件 with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as tmp_file: tmp_file.write(pdf.read()) tmp_file.flush() pdf_reader = PdfReader(tmp_file.name) for page in pdf_reader.pages: text = page.extract_text() if text: raw_text += text + "\n" # 清理臨時文件 os.unlink(tmp_file.name) else: # 這是文件路徑 pdf_reader = PdfReader(pdf) for page in pdf_reader.pages: text = page.extract_text() if text: raw_text += text + "\n" except Exception as e: st.error(f"讀取PDF時發生錯誤:{str(e)}") continue return raw_text def get_text_chunks(text): """將文字分割成區塊進行處理""" text_splitter = CharacterTextSplitter( separator="\n", chunk_size=10000, chunk_overlap=1000, length_function=len ) chunks = text_splitter.split_text(text) return chunks def get_vector_store(chunks): """從文字區塊創建並保存FAISS向量存儲""" try: embeddings = GoogleGenerativeAIEmbeddings( model="models/text-embedding-004", # Updated to newer embedding model google_api_key=gemini_api_key ) vector_store = FAISS.from_texts(chunks, embeddings) vector_store.save_local("faiss_index") return True except Exception as e: st.error(f"創建向量存儲時發生錯誤:{str(e)}") return False def get_conversational_chain(): """Create the conversational chain for Q&A with Flash 2.0""" prompt_template = """ Answer the question as detailed as possible from the provided context. Make sure to provide all the details. If you need more details to perfectly answer the question, then ask for more details that you think need to be known. If the answer is not in the provided context, just say "answer is not available in your provided context". Don't provide the wrong answer. Context:\n {context}\n Question: \n{question}\n Answer: """ # Using Flash 2.0 model model = ChatGoogleGenerativeAI( model="gemini-2.0-flash-exp", # Flash 2.0 model google_api_key=gemini_api_key, temperature=0.3, max_tokens=8192, # Flash 2.0 supports larger context top_p=0.8, top_k=40 ) prompt = PromptTemplate( template=prompt_template, input_variables=['context', 'question'] ) chain = load_qa_chain(model, chain_type="stuff", prompt=prompt) return chain def handle_user_input(question): """Handle user questions and provide answers""" try: # Check if vector store exists if not os.path.exists("faiss_index"): st.warning("Please upload and process PDF files first!") return # Load the vector store with updated embedding model embeddings = GoogleGenerativeAIEmbeddings( model="models/text-embedding-004", # Updated to newer embedding model google_api_key=gemini_api_key ) vector_store = FAISS.load_local( "faiss_index", embeddings=embeddings, allow_dangerous_deserialization=True ) # Search for similar documents (increased k for Flash 2.0's better context handling) docs = vector_store.similarity_search(question, k=6) if not docs: st.write("No relevant information found in the uploaded documents.") return # Get the conversational chain and generate response chain = get_conversational_chain() response = chain( { "input_documents": docs, "question": question, }, return_only_outputs=True ) st.write("**Reply (Flash 2.0):**") st.write(response["output_text"]) except Exception as e: st.error(f"Error processing question: {str(e)}") def main(): """Main Streamlit application""" st.set_page_config( page_title="Chat with Multiple PDFs - Flash 2.0", page_icon="⚡", layout="wide" ) st.header("⚡ Chat With Multiple PDFs using Flash 2.0") st.markdown("Upload your PDF files and ask questions about their content using Google's latest Flash 2.0 model!") # Model info badge st.markdown("""