stevafernandes commited on
Commit
1f9fe23
·
verified ·
1 Parent(s): 85995b1

Create appy.py

Browse files
Files changed (1) hide show
  1. appy.py +110 -0
appy.py ADDED
@@ -0,0 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from PyPDF2 import PdfReader
3
+ from io import BytesIO
4
+ import os
5
+
6
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
7
+ from langchain_google_genai import GoogleGenerativeAIEmbeddings
8
+ from langchain_community.vectorstores import FAISS
9
+ from langchain_google_genai import ChatGoogleGenerativeAI
10
+ from langchain.chains.question_answering import load_qa_chain
11
+ from langchain.prompts import PromptTemplate
12
+
13
+ # --- Get API key from environment variable (set in Hugging Face Secrets or .env file) ---
14
+ GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY", "")
15
+
16
+ def get_pdf_text(pdf_docs):
17
+ text = ""
18
+ for pdf in pdf_docs:
19
+ pdf_reader = PdfReader(BytesIO(pdf.read()))
20
+ for page in pdf_reader.pages:
21
+ page_text = page.extract_text()
22
+ if page_text:
23
+ text += page_text
24
+ return text
25
+
26
+ def get_text_chunks(text):
27
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=1000)
28
+ return text_splitter.split_text(text)
29
+
30
+ def get_vector_store(text_chunks, api_key):
31
+ embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001", google_api_key=api_key)
32
+ vector_store = FAISS.from_texts(text_chunks, embedding=embeddings)
33
+ vector_store.save_local("/tmp/faiss_index")
34
+
35
+ def get_conversational_chain(api_key):
36
+ prompt_template = """
37
+ You are a helpful assistant that only answers based on the context provided from the PDF documents.
38
+ Do not use any external knowledge or assumptions. If the answer is not found in the context below, reply with "I don't know."
39
+ Context:
40
+ {context}
41
+ Question:
42
+ {question}
43
+ Answer:
44
+ """
45
+ model = ChatGoogleGenerativeAI(model="gemini-2.0-flash", temperature=0, google_api_key=api_key)
46
+ prompt = PromptTemplate(template=prompt_template, input_variables=["context", "question"])
47
+ chain = load_qa_chain(model, chain_type="stuff", prompt=prompt)
48
+ return chain
49
+
50
+ def user_input(user_question, api_key):
51
+ embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001", google_api_key=api_key)
52
+ new_db = FAISS.load_local("/tmp/faiss_index", embeddings, allow_dangerous_deserialization=True)
53
+ docs = new_db.similarity_search(user_question)
54
+ chain = get_conversational_chain(api_key)
55
+ response = chain({"input_documents": docs, "question": user_question}, return_only_outputs=True)
56
+ st.write("Reply: ", response["output_text"])
57
+
58
+ def main():
59
+ st.set_page_config(page_title="Chat PDF")
60
+ st.header("Retrieval-Augmented Generation - Gemini 2.0")
61
+ st.markdown("---")
62
+
63
+ # STEP 1: Use API key from env or ask user
64
+ if "api_entered" not in st.session_state:
65
+ st.session_state["api_entered"] = False
66
+ if "pdf_processed" not in st.session_state:
67
+ st.session_state["pdf_processed"] = False
68
+
69
+ api_key = GOOGLE_API_KEY
70
+
71
+ if not st.session_state["api_entered"]:
72
+ if not api_key:
73
+ user_api_key = st.text_input("Enter your Gemini API key", type="password")
74
+ if st.button("Continue") and user_api_key:
75
+ st.session_state["user_api_key"] = user_api_key
76
+ st.session_state["api_entered"] = True
77
+ st.experimental_rerun()
78
+ st.stop()
79
+ else:
80
+ st.session_state["user_api_key"] = api_key
81
+ st.session_state["api_entered"] = True
82
+ st.experimental_rerun()
83
+
84
+ api_key = st.session_state.get("user_api_key", "")
85
+
86
+ # STEP 2: Upload PDF(s)
87
+ if not st.session_state["pdf_processed"]:
88
+ st.subheader("Step 2: Upload your PDF file(s)")
89
+ pdf_docs = st.file_uploader("Upload PDF files", accept_multiple_files=True, type=['pdf'])
90
+ if st.button("Submit & Process PDFs"):
91
+ if pdf_docs:
92
+ with st.spinner("Processing..."):
93
+ raw_text = get_pdf_text(pdf_docs)
94
+ text_chunks = get_text_chunks(raw_text)
95
+ get_vector_store(text_chunks, api_key)
96
+ st.session_state["pdf_processed"] = True
97
+ st.success("PDFs processed! You can now ask questions.")
98
+ st.experimental_rerun()
99
+ else:
100
+ st.error("Please upload at least one PDF file.")
101
+ st.stop()
102
+
103
+ # STEP 3: Ask questions
104
+ st.subheader("Step 3: Ask a question about your PDFs")
105
+ user_question = st.text_input("Ask a question")
106
+ if user_question:
107
+ user_input(user_question, api_key)
108
+
109
+ if __name__ == "__main__":
110
+ main()