seek007 commited on
Commit
f337b6a
·
verified ·
1 Parent(s): cd95686

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +183 -0
app.py ADDED
@@ -0,0 +1,183 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain.chains import RetrievalQA
2
+ from langchain_qdrant import QdrantVectorStore
3
+ from langchain_community.embeddings import OpenAIEmbeddings
4
+ from langchain_community.chat_models import ChatOpenAI
5
+ from langchain.prompts import PromptTemplate
6
+
7
+ # Import additional libraries for web application and audio processing
8
+ import streamlit as st
9
+ import gradio as gr
10
+ import speech_recognition as sr
11
+ from gtts import gTTS
12
+ from io import BytesIO
13
+ import base64
14
+ from streamlit_chat import message
15
+
16
+ import os
17
+ os.environ['OPENAI_API_KEY'] = os.environ.get("OPENAI_API_KEY")
18
+ Qdrant_API_KEY = os.environ.get("Qdrant_API_KEY")
19
+
20
+
21
+ # Initialize embedding model for vector representation of text
22
+ embed_model = OpenAIEmbeddings()
23
+
24
+ # Define the Qdrant URL for vector storage
25
+ qdrant_url = "https://ee4d124d-d295-4df3-ad6b-47fe60d3f80d.europe-west3-0.gcp.cloud.qdrant.io:6333"
26
+
27
+ # Create a Qdrant vector store from the document chunks and embeddings
28
+ # vectorstore = QdrantVectorStore.from_documents(chunks, embed_model, url=qdrant_url, api_key=Qdrant_API_KEY, collection_name="Pakistan_Constitution_eng")
29
+ vectorstore = QdrantVectorStore.from_existing_collection(embedding=embed_model, url=qdrant_url, api_key=Qdrant_API_KEY, collection_name="Pakistan_Constitution_eng") # read from existing collection
30
+ # Initialize the language model for chat interactions
31
+ llm = ChatOpenAI(model_name="gpt-4o-mini", temperature=0)
32
+
33
+ # Build the prompt template for the QA system
34
+ template = """You are a legal assistant providing accurate and concise information based on the Constitution of Pakistan.
35
+ You will receive questions in Urdu language and you have to answer in Urdu language.
36
+ Use the following pieces of context to answer the question at the end.
37
+ Give accurate references to the articles and clauses.
38
+ If you don't know the answer, just say that you don't know, don't try to make up an answer.
39
+ Provide proper relevant citations or references to the exact articles or clauses in the Constitution.
40
+ Keep the answer as concise as possible. Always say "thanks!" in urdu at the end of the answer.
41
+ {context}
42
+ Question: {question}
43
+ Helpful Answer:"""
44
+
45
+ # Define the prompt template for the retrieval QA chain
46
+ QA_CHAIN_PROMPT = PromptTemplate(input_variables=["context", "question"], template=template)
47
+
48
+ # Create a RetrievalQA chain using the language model and vector store
49
+ qa_chain = RetrievalQA.from_chain_type(
50
+ llm,
51
+ retriever=vectorstore.as_retriever(),
52
+ chain_type_kwargs={"prompt": QA_CHAIN_PROMPT}
53
+ )
54
+
55
+ # Function to convert text to speech and return as base64-encoded audio
56
+ def speak_urdu(text):
57
+ tts = gTTS(text=text, lang='ur') # Convert text to speech in Urdu
58
+ audio_fp = BytesIO() # Create a file-like object in memory
59
+ tts.write_to_fp(audio_fp) # Write the audio to the object
60
+ audio_fp.seek(0) # Reset pointer to the beginning
61
+ audio_bytes = audio_fp.read() # Read the audio bytes
62
+ audio_base64 = base64.b64encode(audio_bytes).decode() # Encode bytes to base64
63
+ return audio_base64
64
+
65
+ # Function to recognize Urdu speech
66
+ def recognize_speech():
67
+ recognizer = sr.Recognizer() # Initialize the speech recognizer
68
+ with sr.Microphone() as source:
69
+ st.write("Listening...") # Indicate that the app is listening
70
+ audio = recognizer.listen(source) # Listen for audio input
71
+ try:
72
+ # Recognize speech using Google's API in Urdu
73
+ recognized_text = recognizer.recognize_google(audio, language="ur")
74
+ return recognized_text
75
+ except sr.UnknownValueError:
76
+ return "Sorry, I couldn't understand your speech." # Handle unrecognized speech
77
+ except sr.RequestError:
78
+ return "Error: Unable to process your request." # Handle request error
79
+
80
+ # Placeholder function for Gradio chatbot interaction
81
+ def invoke_chatbot(user_input):
82
+ response = qa_chain.invoke(user_input) # Invoke the QA chain with user input
83
+ return response["result"] # Return the result from the chain
84
+
85
+ # Helper function to autoplay audio using HTML
86
+ def autoplay_audio(audio_base64):
87
+ audio_html = f"""
88
+ <audio autoplay="true" controls="false" style="display:none;">
89
+ <source src="data:audio/mp3;base64,{audio_base64}" type="audio/mp3">
90
+ </audio>
91
+ """
92
+ st.markdown(audio_html, unsafe_allow_html=True) # Render audio in Streamlit
93
+
94
+ # Initialize session state variables for chat history and input
95
+ if "history" not in st.session_state:
96
+ st.session_state["history"] = [] # Chat history
97
+
98
+ if "input_text" not in st.session_state:
99
+ st.session_state["input_text"] = "" # User input text
100
+
101
+ if "voice_input" not in st.session_state:
102
+ st.session_state["voice_input"] = "" # Voice input text
103
+
104
+ if "audio_playback" not in st.session_state:
105
+ st.session_state["audio_playback"] = None # Current audio for playback
106
+
107
+ # Clear chat function to reset session state
108
+ def clear_chat():
109
+ st.session_state["history"] = [] # Clear chat history
110
+ st.session_state["input_text"] = "" # Clear user input text
111
+ st.session_state["audio_playback"] = None # Clear audio playback
112
+
113
+ # Sidebar for developer details, disclaimer, and copyright information
114
+ st.sidebar.image("c.png", use_column_width=True)
115
+ st.sidebar.title("Developer Details")
116
+ st.sidebar.write("Developed by: **Abdul S.**")
117
+ st.sidebar.write("@XevenSolutions")
118
+ st.sidebar.write("LinkedIn: [linkedin.com/in/kushikhlaq](https://www.linkedin.com/in/kushikhlaq/)")
119
+
120
+ st.sidebar.title("Disclaimer")
121
+ st.sidebar.write("This chatbot provides information on the Constitution of Pakistan in Urdu. "
122
+ "Please note that the information may not be comprehensive or up to date. "
123
+ "For official references, please consult legal professionals.")
124
+
125
+ st.sidebar.title("Copyright")
126
+ st.sidebar.write("© 2024 Abdul Samad. All rights reserved.")
127
+
128
+ # Streamlit app layout for user interaction
129
+ st.title("Urdu Chatbot - Constitution of Pakistan")
130
+ st.write("Chat with the Constitution of Pakistan in Urdu, either by typing or speaking.")
131
+
132
+ # Button to clear chat
133
+ if st.button("Clear Chat"):
134
+ clear_chat() # Call the clear chat function
135
+
136
+ # Handle user text input
137
+ def handle_user_input():
138
+ user_input = st.session_state["input_text"] # Get user input from session state
139
+ if user_input:
140
+ # Pass the user input to the chatbot model
141
+ chatbot_response = invoke_chatbot(user_input)
142
+
143
+ # Store the chat history
144
+ st.session_state["history"].append({"user": user_input, "bot": chatbot_response})
145
+
146
+ # Generate audio for the chatbot response
147
+ audio_base64 = speak_urdu(chatbot_response)
148
+ st.session_state["audio_playback"] = audio_base64 # Store the audio for playback
149
+
150
+ # Clear the input after submission
151
+ st.session_state["input_text"] = ""
152
+
153
+ # Text input field with callback to handle user input
154
+ st.text_input(
155
+ "Type your question in Urdu",
156
+ value=st.session_state["input_text"], # Default value from session state
157
+ key="input_text", # Key for session state
158
+ on_change=handle_user_input # Callback function on input change
159
+ )
160
+
161
+ # Voice input option to recognize speech and handle it
162
+ if st.button("Speak Now"):
163
+ recognized_text = recognize_speech() # Get recognized speech
164
+ if recognized_text:
165
+ # Handle voice input by appending it to chat history
166
+ chatbot_response = invoke_chatbot(recognized_text)
167
+ st.session_state["history"].append({"user": recognized_text, "bot": chatbot_response})
168
+
169
+ # Generate audio for the chatbot response
170
+ audio_base64 = speak_urdu(chatbot_response)
171
+ st.session_state["audio_playback"] = audio_base64 # Store the audio for playback
172
+
173
+ st.session_state["voice_input"] = "" # Clear voice input after processing
174
+
175
+ # Display chat history using streamlit_chat component
176
+ for idx, chat in enumerate(st.session_state["history"]):
177
+ message(chat["user"], is_user=True, key=f"user_{idx}") # Display user message
178
+ message(chat["bot"], key=f"bot_{idx}") # Display bot response
179
+
180
+ # Autoplay audio for the latest chatbot response
181
+ if st.session_state["audio_playback"]:
182
+ autoplay_audio(st.session_state["audio_playback"]) # Play the audio
183
+ st.session_state["audio_playback"] = None # Reset audio playback state