Spaces:
Build error
Build error
| from langchain.chains import RetrievalQA | |
| from langchain_qdrant import QdrantVectorStore | |
| from langchain_community.embeddings import OpenAIEmbeddings | |
| from langchain_community.chat_models import ChatOpenAI | |
| from langchain.prompts import PromptTemplate | |
| # Import additional libraries for web application and audio processing | |
| import streamlit as st | |
| import gradio as gr | |
| import speech_recognition as sr | |
| from gtts import gTTS | |
| from io import BytesIO | |
| import base64 | |
| from streamlit_chat import message | |
| import os | |
| os.environ['OPENAI_API_KEY'] = os.environ.get("OPENAI_API_KEY") | |
| Qdrant_API_KEY = os.environ.get("Qdrant_API_KEY") | |
| # Initialize embedding model for vector representation of text | |
| embed_model = OpenAIEmbeddings() | |
| # Define the Qdrant URL for vector storage | |
| qdrant_url = "https://ee4d124d-d295-4df3-ad6b-47fe60d3f80d.europe-west3-0.gcp.cloud.qdrant.io:6333" | |
| # Create a Qdrant vector store from the document chunks and embeddings | |
| # vectorstore = QdrantVectorStore.from_documents(chunks, embed_model, url=qdrant_url, api_key=Qdrant_API_KEY, collection_name="Pakistan_Constitution_eng") | |
| vectorstore = QdrantVectorStore.from_existing_collection(embedding=embed_model, url=qdrant_url, api_key=Qdrant_API_KEY, collection_name="Pakistan_Constitution_eng") # read from existing collection | |
| # Initialize the language model for chat interactions | |
| llm = ChatOpenAI(model_name="gpt-4o-mini", temperature=0) | |
| # Build the prompt template for the QA system | |
| template = """You are a legal assistant providing accurate and concise information based on the Constitution of Pakistan. | |
| You will receive questions in Urdu language and you have to answer in Urdu language. | |
| Use the following pieces of context to answer the question at the end. | |
| Give accurate references to the articles and clauses. | |
| If you don't know the answer, just say that you don't know, don't try to make up an answer. | |
| Provide proper relevant citations or references to the exact articles or clauses in the Constitution. | |
| Keep the answer as concise as possible. Always say "thanks!" in urdu at the end of the answer. | |
| {context} | |
| Question: {question} | |
| Helpful Answer:""" | |
| # Define the prompt template for the retrieval QA chain | |
| QA_CHAIN_PROMPT = PromptTemplate(input_variables=["context", "question"], template=template) | |
| # Create a RetrievalQA chain using the language model and vector store | |
| qa_chain = RetrievalQA.from_chain_type( | |
| llm, | |
| retriever=vectorstore.as_retriever(), | |
| chain_type_kwargs={"prompt": QA_CHAIN_PROMPT} | |
| ) | |
| # Function to convert text to speech and return as base64-encoded audio | |
| def speak_urdu(text): | |
| tts = gTTS(text=text, lang='ur') # Convert text to speech in Urdu | |
| audio_fp = BytesIO() # Create a file-like object in memory | |
| tts.write_to_fp(audio_fp) # Write the audio to the object | |
| audio_fp.seek(0) # Reset pointer to the beginning | |
| audio_bytes = audio_fp.read() # Read the audio bytes | |
| audio_base64 = base64.b64encode(audio_bytes).decode() # Encode bytes to base64 | |
| return audio_base64 | |
| # Function to recognize Urdu speech | |
| def recognize_speech(): | |
| recognizer = sr.Recognizer() # Initialize the speech recognizer | |
| with sr.Microphone() as source: | |
| st.write("Listening...") # Indicate that the app is listening | |
| audio = recognizer.listen(source) # Listen for audio input | |
| try: | |
| # Recognize speech using Google's API in Urdu | |
| recognized_text = recognizer.recognize_google(audio, language="ur") | |
| return recognized_text | |
| except sr.UnknownValueError: | |
| return "Sorry, I couldn't understand your speech." # Handle unrecognized speech | |
| except sr.RequestError: | |
| return "Error: Unable to process your request." # Handle request error | |
| # Placeholder function for Gradio chatbot interaction | |
| def invoke_chatbot(user_input): | |
| response = qa_chain.invoke(user_input) # Invoke the QA chain with user input | |
| return response["result"] # Return the result from the chain | |
| # Helper function to autoplay audio using HTML | |
| def autoplay_audio(audio_base64): | |
| audio_html = f""" | |
| <audio autoplay="true" controls="false" style="display:none;"> | |
| <source src="data:audio/mp3;base64,{audio_base64}" type="audio/mp3"> | |
| </audio> | |
| """ | |
| st.markdown(audio_html, unsafe_allow_html=True) # Render audio in Streamlit | |
| # Initialize session state variables for chat history and input | |
| if "history" not in st.session_state: | |
| st.session_state["history"] = [] # Chat history | |
| if "input_text" not in st.session_state: | |
| st.session_state["input_text"] = "" # User input text | |
| if "voice_input" not in st.session_state: | |
| st.session_state["voice_input"] = "" # Voice input text | |
| if "audio_playback" not in st.session_state: | |
| st.session_state["audio_playback"] = None # Current audio for playback | |
| # Clear chat function to reset session state | |
| def clear_chat(): | |
| st.session_state["history"] = [] # Clear chat history | |
| st.session_state["input_text"] = "" # Clear user input text | |
| st.session_state["audio_playback"] = None # Clear audio playback | |
| # Sidebar for developer details, disclaimer, and copyright information | |
| st.sidebar.image("c.png", use_column_width=True) | |
| st.sidebar.title("Developer Details") | |
| st.sidebar.write("Developed by: **Abdul S.**") | |
| st.sidebar.write("@XevenSolutions") | |
| st.sidebar.write("LinkedIn: [linkedin.com/in/kushikhlaq](https://www.linkedin.com/in/kushikhlaq/)") | |
| st.sidebar.title("Disclaimer") | |
| st.sidebar.write("This chatbot provides information on the Constitution of Pakistan in Urdu. " | |
| "Please note that the information may not be comprehensive or up to date. " | |
| "For official references, please consult legal professionals.") | |
| st.sidebar.title("Copyright") | |
| st.sidebar.write("© 2024 Abdul Samad. All rights reserved.") | |
| # Streamlit app layout for user interaction | |
| st.title("Urdu Chatbot - Constitution of Pakistan") | |
| st.write("Chat with the Constitution of Pakistan in Urdu, either by typing or speaking.") | |
| # Button to clear chat | |
| if st.button("Clear Chat"): | |
| clear_chat() # Call the clear chat function | |
| # Handle user text input | |
| def handle_user_input(): | |
| user_input = st.session_state["input_text"] # Get user input from session state | |
| if user_input: | |
| # Pass the user input to the chatbot model | |
| chatbot_response = invoke_chatbot(user_input) | |
| # Store the chat history | |
| st.session_state["history"].append({"user": user_input, "bot": chatbot_response}) | |
| # Generate audio for the chatbot response | |
| audio_base64 = speak_urdu(chatbot_response) | |
| st.session_state["audio_playback"] = audio_base64 # Store the audio for playback | |
| # Clear the input after submission | |
| st.session_state["input_text"] = "" | |
| # Text input field with callback to handle user input | |
| st.text_input( | |
| "Type your question in Urdu", | |
| value=st.session_state["input_text"], # Default value from session state | |
| key="input_text", # Key for session state | |
| on_change=handle_user_input # Callback function on input change | |
| ) | |
| # Voice input option to recognize speech and handle it | |
| if st.button("Speak Now"): | |
| recognized_text = recognize_speech() # Get recognized speech | |
| if recognized_text: | |
| # Handle voice input by appending it to chat history | |
| chatbot_response = invoke_chatbot(recognized_text) | |
| st.session_state["history"].append({"user": recognized_text, "bot": chatbot_response}) | |
| # Generate audio for the chatbot response | |
| audio_base64 = speak_urdu(chatbot_response) | |
| st.session_state["audio_playback"] = audio_base64 # Store the audio for playback | |
| st.session_state["voice_input"] = "" # Clear voice input after processing | |
| # Display chat history using streamlit_chat component | |
| for idx, chat in enumerate(st.session_state["history"]): | |
| message(chat["user"], is_user=True, key=f"user_{idx}") # Display user message | |
| message(chat["bot"], key=f"bot_{idx}") # Display bot response | |
| # Autoplay audio for the latest chatbot response | |
| if st.session_state["audio_playback"]: | |
| autoplay_audio(st.session_state["audio_playback"]) # Play the audio | |
| st.session_state["audio_playback"] = None # Reset audio playback state | |