# src/utils/chat.py
import os
import tempfile
import streamlit as st
from langchain_community.vectorstores import DeepLake
from langchain_community.embeddings import OpenAIEmbeddings
from langchain_community.chat_models import ChatOpenAI
from langchain.chains import RetrievalQA
import openai
from streamlit_chat import message
from src.utils.process import process
from src.utils.load_and_split import load_docs, split_docs
import shutil
from langchain.cache import InMemoryCache
from langchain.globals import set_llm_cache
set_llm_cache(InMemoryCache())
def run_chat_app():
"""Run the chat application using the Streamlit framework."""
st.title("Code Weaver") # App title
# Initialize session state variables if they don't exist
if "generated" not in st.session_state:
st.session_state["generated"] = ["I am ready to help you!"]
if "past" not in st.session_state:
st.session_state["past"] = ["Hello"]
# Initialize data and status in the session
if "data" not in st.session_state:
st.session_state["data"] = {
"repo_url": None,
"include_file_extensions": None,
"activeloop_dataset_path": None,
"repo_destination": None,
"status": "Please Provide Data"
}
# Sidebar for API keys and data
with st.sidebar:
st.header("Configuration")
# Open AI key
openai_api_key = st.text_input("OpenAI API Key", type="password")
if openai_api_key:
os.environ["OPENAI_API_KEY"] = openai_api_key
#activeloop key
activeloop_token = st.text_input("Activeloop Token", type="password")
if activeloop_token:
os.environ["ACTIVELOOP_TOKEN"] = activeloop_token
# activeloop username
activeloop_username = st.text_input("Activeloop Username")
if activeloop_username:
os.environ["ACTIVELOOP_USERNAME"] = activeloop_username
st.session_state["data"]["repo_url"] = st.text_input("GitHub Repository URL")
file_extensions_input = st.text_input("File Extensions (comma-separated, e.g., .py,.js)").strip()
st.session_state["data"]["include_file_extensions"] = [ext.strip() for ext in file_extensions_input.split(",")] if file_extensions_input else None
dataset_name = st.text_input("Dataset Name")
if dataset_name:
st.session_state["data"]["activeloop_dataset_path"] = f"hub://{os.environ.get('ACTIVELOOP_USERNAME')}/{dataset_name}"
else:
st.session_state["data"]["activeloop_dataset_path"] = None
st.session_state["data"]["repo_destination"] = "repos"
if st.button("Process Repository"):
if st.session_state["data"]["repo_url"] and st.session_state["data"]["activeloop_dataset_path"] and os.environ.get("OPENAI_API_KEY") and os.environ.get("ACTIVELOOP_TOKEN") and os.environ.get("ACTIVELOOP_USERNAME") :
st.session_state["data"]["status"] = "Processing Data"
with st.spinner("Processing the repository, please wait"):
process_repo()
st.session_state["data"]["status"] = "Ready to Chat!"
else :
st.session_state["data"]["status"] = "Missing Data"
# Chat input and display area
st.write(st.session_state["data"]["status"])
if st.session_state["data"]["status"] == "Ready to Chat!":
user_input = get_text()
if user_input:
output = search_db(user_input)
st.session_state.past.append(user_input)
st.session_state.generated.append(output)
if st.session_state["generated"]:
for i in range(len(st.session_state["generated"])):
message(st.session_state["past"][i], is_user=True, key=str(i) + "_user")
message(st.session_state["generated"][i], key=str(i))
# Footer
st.markdown(
"""
Made with ❤️ by Glorry Sibomana
""", unsafe_allow_html=True, ) def get_text(): """Create a Streamlit input field and return the user's input.""" input_text = st.text_input("Enter your query:", key="input", label_visibility="hidden") return input_text def search_db(query): """Search for a response to the query in the DeepLake database.""" # Set up embeddings and database embeddings = OpenAIEmbeddings(model="text-embedding-ada-002") db = DeepLake( dataset_path=st.session_state["data"]["activeloop_dataset_path"], read_only=True, embedding_function=embeddings, ) # Set up retriever with custom search parameters retriever = db.as_retriever() retriever.search_kwargs["distance_metric"] = "cos" retriever.search_kwargs["fetch_k"] = 100 retriever.search_kwargs["k"] = 10 # Initialize chat model model = ChatOpenAI(model="gpt-3.5-turbo") # Set up RetrievalQA chain qa = RetrievalQA.from_llm(model, retriever=retriever) return qa.run(query) def process_repo(): """Process the repository and save embeddings into Deep Lake dataset.""" with tempfile.TemporaryDirectory() as temp_dir: repo_destination = os.path.join(temp_dir, "repo_clone") repo_url = st.session_state["data"]["repo_url"] include_file_extensions = st.session_state["data"]["include_file_extensions"] activeloop_dataset_path = st.session_state["data"]["activeloop_dataset_path"] process( repo_url, include_file_extensions, activeloop_dataset_path, repo_destination, ) if __name__ == "__main__": run_chat_app()