meesamraza commited on
Commit
e5f5057
Β·
verified Β·
1 Parent(s): cd2a69a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +70 -36
app.py CHANGED
@@ -10,16 +10,22 @@ from langchain.memory import ConversationBufferMemory
10
  from langchain.chains import ConversationalRetrievalChain
11
  from langchain_groq import ChatGroq
12
 
 
13
  # Load environment variables
 
14
  load_dotenv()
15
 
16
- # Set up logging
 
 
17
  logging.basicConfig(
18
  level=logging.INFO,
19
  format='%(asctime)s - %(levelname)s - %(message)s'
20
  )
21
 
22
- # Function to extract text from PDF files
 
 
23
  def get_pdf_text(pdf_docs):
24
  text = ""
25
  for pdf in pdf_docs:
@@ -30,7 +36,9 @@ def get_pdf_text(pdf_docs):
30
  text += extracted_text + "\n"
31
  return text
32
 
33
- # Function to split the extracted text into chunks
 
 
34
  def get_text_chunks(text):
35
  text_splitter = CharacterTextSplitter(
36
  separator="\n",
@@ -40,70 +48,96 @@ def get_text_chunks(text):
40
  )
41
  return text_splitter.split_text(text)
42
 
43
- # Function to create a FAISS vectorstore
 
 
44
  def get_vectorstore(text_chunks):
45
  embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
46
- vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
47
- return vectorstore
48
 
49
- # Function to set up the conversational retrieval chain
 
 
50
  def get_conversation_chain(vectorstore):
51
  try:
52
  llm = ChatGroq(model="llama-3.3-70b-versatile", temperature=0.5)
53
  memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)
54
-
55
  conversation_chain = ConversationalRetrievalChain.from_llm(
56
  llm=llm,
57
  retriever=vectorstore.as_retriever(),
58
  memory=memory
59
  )
60
-
61
- logging.info("Conversation chain created successfully.")
62
  return conversation_chain
63
  except Exception as e:
64
  logging.error(f"Error creating conversation chain: {e}")
65
  st.error("An error occurred while setting up the conversation chain.")
66
 
 
67
  # Handle user input
 
68
  def handle_userinput(user_question):
69
  if st.session_state.conversation is not None:
70
- response = st.session_state.conversation({'question': user_question})
71
- st.session_state.chat_history = response['chat_history']
 
72
 
 
73
  for i, message in enumerate(st.session_state.chat_history):
74
- role = "User" if i % 2 == 0 else "Bot"
75
- st.write(f"*{role}:* {message.content}")
 
 
76
  else:
77
- st.warning("Please process the documents first.")
78
 
79
- # Main function to run the Streamlit app
 
 
80
  def main():
81
- load_dotenv()
82
- st.set_page_config(page_title="Chat with PDFs", page_icon=":books:")
 
83
 
 
84
  if "conversation" not in st.session_state:
85
  st.session_state.conversation = None
86
  if "chat_history" not in st.session_state:
87
  st.session_state.chat_history = None
88
 
89
- st.header("Chat with PDFs :books:")
90
- user_question = st.text_input("Ask a question about your documents:")
91
- if user_question:
92
- handle_userinput(user_question)
93
-
94
  with st.sidebar:
95
- st.subheader("Your documents")
96
- pdf_docs = st.file_uploader("Upload PDFs and click 'Process'", accept_multiple_files=True, type=["pdf"])
97
- if st.button("Process"):
98
- with st.spinner("Processing..."):
99
- raw_text = get_pdf_text(pdf_docs)
100
- if raw_text.strip():
101
- text_chunks = get_text_chunks(raw_text)
102
- vectorstore = get_vectorstore(text_chunks)
103
- st.session_state.conversation = get_conversation_chain(vectorstore)
104
- st.success("Processing complete! You can now ask questions.")
105
- else:
106
- st.error("No valid text extracted from the PDFs.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
107
 
108
  if __name__ == '__main__':
109
- main()
 
10
  from langchain.chains import ConversationalRetrievalChain
11
  from langchain_groq import ChatGroq
12
 
13
+ # --------------------------
14
  # Load environment variables
15
+ # --------------------------
16
  load_dotenv()
17
 
18
+ # --------------------------
19
+ # Logging configuration
20
+ # --------------------------
21
  logging.basicConfig(
22
  level=logging.INFO,
23
  format='%(asctime)s - %(levelname)s - %(message)s'
24
  )
25
 
26
+ # --------------------------
27
+ # PDF text extraction
28
+ # --------------------------
29
  def get_pdf_text(pdf_docs):
30
  text = ""
31
  for pdf in pdf_docs:
 
36
  text += extracted_text + "\n"
37
  return text
38
 
39
+ # --------------------------
40
+ # Text chunking
41
+ # --------------------------
42
  def get_text_chunks(text):
43
  text_splitter = CharacterTextSplitter(
44
  separator="\n",
 
48
  )
49
  return text_splitter.split_text(text)
50
 
51
+ # --------------------------
52
+ # FAISS VectorStore creation
53
+ # --------------------------
54
  def get_vectorstore(text_chunks):
55
  embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
56
+ return FAISS.from_texts(texts=text_chunks, embedding=embeddings)
 
57
 
58
+ # --------------------------
59
+ # Conversation chain
60
+ # --------------------------
61
  def get_conversation_chain(vectorstore):
62
  try:
63
  llm = ChatGroq(model="llama-3.3-70b-versatile", temperature=0.5)
64
  memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)
 
65
  conversation_chain = ConversationalRetrievalChain.from_llm(
66
  llm=llm,
67
  retriever=vectorstore.as_retriever(),
68
  memory=memory
69
  )
70
+ logging.info("βœ… Conversation chain created successfully.")
 
71
  return conversation_chain
72
  except Exception as e:
73
  logging.error(f"Error creating conversation chain: {e}")
74
  st.error("An error occurred while setting up the conversation chain.")
75
 
76
+ # --------------------------
77
  # Handle user input
78
+ # --------------------------
79
  def handle_userinput(user_question):
80
  if st.session_state.conversation is not None:
81
+ with st.spinner("πŸ€– Thinking..."):
82
+ response = st.session_state.conversation({'question': user_question})
83
+ st.session_state.chat_history = response['chat_history']
84
 
85
+ # Display chat history in a chat-like format
86
  for i, message in enumerate(st.session_state.chat_history):
87
+ if i % 2 == 0:
88
+ st.markdown(f"πŸ§‘ **You:** {message.content}")
89
+ else:
90
+ st.markdown(f"πŸ€– **Bot:** {message.content}")
91
  else:
92
+ st.warning("⚠ Please process the documents first.")
93
 
94
+ # --------------------------
95
+ # Main Streamlit App
96
+ # --------------------------
97
  def main():
98
+ st.set_page_config(page_title="AI PDF Chatbot", page_icon="πŸ“š", layout="wide")
99
+ st.title("πŸ“š AI-Powered PDF Chatbot")
100
+ st.markdown("Chat with your documents using **LLaMA 3.3** and **Groq AI**. Perfect for research, learning, and exhibitions!")
101
 
102
+ # Session state initialization
103
  if "conversation" not in st.session_state:
104
  st.session_state.conversation = None
105
  if "chat_history" not in st.session_state:
106
  st.session_state.chat_history = None
107
 
108
+ # Sidebar - Upload PDFs
 
 
 
 
109
  with st.sidebar:
110
+ st.header("πŸ“‚ Upload & Process")
111
+ pdf_docs = st.file_uploader("Upload PDFs", accept_multiple_files=True, type=["pdf"])
112
+ if st.button("πŸš€ Process Documents"):
113
+ if pdf_docs:
114
+ with st.spinner("πŸ“– Reading & Processing..."):
115
+ raw_text = get_pdf_text(pdf_docs)
116
+ if raw_text.strip():
117
+ text_chunks = get_text_chunks(raw_text)
118
+ vectorstore = get_vectorstore(text_chunks)
119
+ st.session_state.conversation = get_conversation_chain(vectorstore)
120
+ st.success("βœ… Documents processed! You can now ask questions.")
121
+ else:
122
+ st.error("No valid text found in PDFs.")
123
+ else:
124
+ st.warning("Please upload at least one PDF.")
125
+
126
+ # Main Chat Section
127
+ st.subheader("πŸ’¬ Ask a Question")
128
+ user_question = st.text_input("Type your question here...")
129
+ if st.button("Submit Question"):
130
+ if user_question.strip():
131
+ handle_userinput(user_question)
132
+ else:
133
+ st.warning("Please enter a question before submitting.")
134
+
135
+ # Chat History
136
+ if st.session_state.chat_history:
137
+ st.subheader("πŸ“ Chat History")
138
+ for i, message in enumerate(st.session_state.chat_history):
139
+ role = "User" if i % 2 == 0 else "Bot"
140
+ st.write(f"**{role}:** {message.content}")
141
 
142
  if __name__ == '__main__':
143
+ main()