from gpt_index import GPTSimpleVectorIndex from llama_index.indices.query.query_transform.base import HyDEQueryTransform import gradio as gr from gradio import Interface, Textbox import sys import os from datetime import datetime, timedelta import pytz import huggingface_hub from huggingface_hub import Repository, HfApi from datetime import datetime import csv os.environ["OPENAI_API_KEY"] = os.environ['SECRET_CODE'] AUS_TIMEZONE = pytz.timezone('Australia/Sydney') # Best practice is to use a persistent dataset DATASET_REPO_URL = "https://huggingface.co/datasets/peterpull/MediatorBot" DATA_FILENAME = "data.txt" INDEX_FILENAME = "index2.json" DATA_FILE = os.path.join("data", DATA_FILENAME) INDEX_FILE = os.path.join("data", INDEX_FILENAME) #this will be called later to upload the chat history back to the dataset api=HfApi() # we need a HF access token - read I think suffices becuase we are cloning the distant repo to local space repo. HF_TOKEN = os.environ.get("HF_TOKEN") print("HF TOKEN is none?", HF_TOKEN is None) print("HF hub ver", huggingface_hub.__version__) #Clones the distant repo to the local repo repo = Repository( local_dir='data', clone_from=DATASET_REPO_URL, use_auth_token=HF_TOKEN) #PRINT file locations print(f"Repo local_dir: {repo.local_dir}") print(f"Repo files: {os.listdir(repo.local_dir)}") print (f"Index file:{INDEX_FILENAME}") def generate_text() -> str: with open(DATA_FILE) as file: text = "" for line in file: row_parts = line.strip().split(",") if len(row_parts) != 3: continue user, chatbot, time = row_parts text += f"Time: {time}\nUser: {user}\nChatbot: {chatbot}\n\n" return text if text else "No messages yet" def store_message(chatinput: str, chatresponse: str): if chatinput and chatresponse: now = datetime.now() # current time in UTC aus_time = now.astimezone(AUS_TIMEZONE) # convert to Australia timezone timestamp = aus_time.strftime("%Y-%m-%d %H:%M:%S") user_input = f"User: {chatinput}" chatbot_response = f"Chatbot: {chatresponse}" separator = "-" * 30 message = f"{timestamp}\n{user_input}\n{chatbot_response}\n{separator}\n" with open(DATA_FILE, "a") as file: file.write(message) print(f"Wrote to datafile: {message}") #need to find a way to push back to dataset repo HF_WRITE_TOKEN = os.environ.get("WRITE_TOKEN") api.upload_file( path_or_fileobj=DATA_FILE, path_in_repo='data.txt', repo_id="peterpull/MediatorBot", repo_type="dataset", commit_message="Add new chat history", use_auth_token=HF_WRITE_TOKEN) return generate_text() def get_index(index_file_path): if os.path.exists(index_file_path): #print 500 characters of json header print_header_json_file(index_file_path) index_size = os.path.getsize(index_file_path) print(f"Size of {index_file_path}: {index_size} bytes") #let me know how big json file is. loaded_index = GPTSimpleVectorIndex.load_from_disk(index_file_path) return loaded_index else: print(f"Error: '{index_file_path}' does not exist.") sys.exit() def print_header_json_file(filepath): with open(filepath, 'r') as f: file_contents = f.read() print ("JSON FILE HEADER:") print(file_contents[:500]) # print only the first 500 characters index = get_index(INDEX_FILE) # define the conversation_history list conversation_history = [] # passes the prompt to the chatbot def chatbot(input_text, history=conversation_history): hyde= HyDEQueryTransform(include_original=True) prompt = f"In character as John Haynes, please respond to: {input_text}. Only reply with contextual information or say you cannot find an answer. End with a reflective question." response = index.query(prompt, response_mode="default", verbose=True, query_transform=hyde) store_message(input_text,response) # append the current input and response to the conversation history history.append((input_text, response.response)) # return the response and updated conversation history return [(input_text, response.response)], history with open('about.txt', 'r') as file: about = file.read() examples=[["What are three excellent questions to ask at intake?"],["How do you handle high conflict divorce cases?"],["Which metaphors do you steer parties away from in mediation? Which do you prefer?"]] description="GPT3_Chatbot drawing on contextual mediation material, v0.6H" title="The MediatorBot" iface = Interface( fn=chatbot, inputs=[Textbox("Enter your question"), "state"], outputs=["chatbot", "state"], title=title, description=description, article=about, examples=examples) iface.launch()