# import streamlit as st # from PyPDF2 import PdfReader # from langchain.text_splitter import RecursiveCharacterTextSplitter # import os # from langchain_google_genai import GoogleGenerativeAIEmbeddings # import google.generativeai as genai # from langchain.vectorstores import FAISS # from langchain_google_genai import ChatGoogleGenerativeAI # from langchain.chains.question_answering import load_qa_chain # from langchain.prompts import PromptTemplate # from dotenv import load_dotenv # load_dotenv() # os.getenv("GOOGLE_API_KEY") # genai.configure(api_key=os.getenv("GOOGLE_API_KEY")) # def get_pdf_text(pdf_docs): # text="" # for pdf in pdf_docs: # pdf_reader= PdfReader(pdf) # for page in pdf_reader.pages: # text+= page.extract_text() # return text # def get_text_chunks(text): # text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=1000) # chunks = text_splitter.split_text(text) # return chunks # def get_vector_store(text_chunks): # embeddings = GoogleGenerativeAIEmbeddings(model = "models/embedding-001") # vector_store = FAISS.from_texts(text_chunks, embedding=embeddings) # vector_store.save_local("faiss_index") # def get_conversational_chain(): # prompt_template = """ # Answer the question as detailed as possible from the provided context, make sure to provide all the details, if the answer is not in # provided context just say, "answer is not available in the context", don't provide the wrong answer\n\n # Context:\n {context}?\n # Question: \n{question}\n # Answer: # """ # model = ChatGoogleGenerativeAI(model="gemini-pro", # temperature=0.3) # prompt = PromptTemplate(template = prompt_template, input_variables = ["context", "question"]) # chain = load_qa_chain(model, chain_type="stuff", prompt=prompt) # return chain # def user_input(user_question): # embeddings = GoogleGenerativeAIEmbeddings(model = "models/embedding-001") # new_db = FAISS.load_local("faiss_index", embeddings,allow_dangerous_deserialization=True) # docs = new_db.similarity_search(user_question) # chain = get_conversational_chain() # response = chain( # {"input_documents":docs, "question": user_question} # , return_only_outputs=True) # print(response) # st.write("Reply: ", response["output_text"]) # def main(): # st.set_page_config("Chat PDF") # st.header("Chat with PDF using Gemini💁") # user_question = st.text_input("Ask a Question from the PDF Files") # if user_question: # user_input(user_question) # with st.sidebar: # st.title("Menu:") # pdf_docs = st.file_uploader("Upload your PDF Files and Click on the Submit & Process Button", accept_multiple_files=True) # if st.button("Submit & Process"): # with st.spinner("Processing..."): # raw_text = get_pdf_text(pdf_docs) # text_chunks = get_text_chunks(raw_text) # get_vector_store(text_chunks) # st.success("Done") # if __name__ == "__main__": # main() import streamlit as st from PyPDF2 import PdfReader from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI from langchain.vectorstores import FAISS from langchain.chains.question_answering import load_qa_chain from langchain.prompts import PromptTemplate from dotenv import load_dotenv import os # Load API key load_dotenv() genai_key = os.getenv("GOOGLE_API_KEY") # Constants for cost calculation EMBEDDING_COST_PER_1000_TOKENS = 0.0002 # USD LM_COST_PER_1000_TOKENS = 0.0001 # USD def get_pdf_text(pdf_docs): """Extract text from uploaded PDF documents.""" text = "" for pdf in pdf_docs: pdf_reader = PdfReader(pdf) for page in pdf_reader.pages: text += page.extract_text() return text def get_text_chunks(text): """Split the extracted text into chunks for embedding.""" text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=1000) chunks = text_splitter.split_text(text) return chunks def calculate_cost(tokens, rate_per_1000): """Calculate cost based on tokens and rate.""" return (tokens / 1000) * rate_per_1000 def get_vector_store(text_chunks): """Generate embeddings and store in FAISS.""" embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001") vector_store = FAISS.from_texts(text_chunks, embedding=embeddings) vector_store.save_local("faiss_index") # Calculate embedding cost total_tokens = sum(len(chunk.split()) for chunk in text_chunks) embedding_cost = calculate_cost(total_tokens, EMBEDDING_COST_PER_1000_TOKENS) return embedding_cost def get_conversational_chain(): """Set up the conversational chain.""" prompt_template = """ Answer the question as detailed as possible from the provided context. If the answer is not in the context, respond with "answer is not available in the context".\n\n Context:\n {context}?\n Question:\n {question}\n Answer: """ model = ChatGoogleGenerativeAI(model="gemini-1.5-flash", temperature=0.3) prompt = PromptTemplate(template=prompt_template, input_variables=["context", "question"]) chain = load_qa_chain(model, chain_type="stuff", prompt=prompt) return chain def process_user_question(user_question): """Process user question and calculate costs.""" embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001") vector_store = FAISS.load_local("faiss_index", embeddings, allow_dangerous_deserialization=True) docs = vector_store.similarity_search(user_question) # Token estimation for retrieval retrieval_tokens = sum(len(doc.page_content.split()) for doc in docs) chain = get_conversational_chain() response = chain({"input_documents": docs, "question": user_question}, return_only_outputs=True) # Token estimation for inference input_tokens = sum(len(doc.page_content.split()) for doc in docs) + len(user_question.split()) output_tokens = len(response["output_text"].split()) # Cost calculation retrieval_cost = calculate_cost(retrieval_tokens, EMBEDDING_COST_PER_1000_TOKENS) inference_cost = calculate_cost(input_tokens + output_tokens, LM_COST_PER_1000_TOKENS) total_cost = retrieval_cost + inference_cost # Output the results st.write("Response:", response["output_text"]) st.write(f"Embedding Cost: ${retrieval_cost:.4f}") st.write(f"Language Model Cost: ${inference_cost:.4f}") st.write(f"Total Query Cost: ${total_cost:.4f}") def main(): """Streamlit app entry point.""" st.set_page_config("Chat PDF Cost Calculator") st.header("Chat with PDF using Gemini 💁 (Cost Included)") user_question = st.text_input("Ask a Question from the PDF Files") if user_question: process_user_question(user_question) with st.sidebar: st.title("Menu:") pdf_docs = st.file_uploader("Upload your PDF Files and Click Submit & Process", accept_multiple_files=True) if st.button("Submit & Process"): with st.spinner("Processing..."): raw_text = get_pdf_text(pdf_docs) text_chunks = get_text_chunks(raw_text) embedding_cost = get_vector_store(text_chunks) st.success(f"Processing Done! Embedding Cost: ${embedding_cost:.4f}") if __name__ == "__main__": main()