Spaces:

peterpull
/

MediatorBot

Runtime error

File size: 5,000 Bytes

b5ba5bf
914d7e5
eb9bbc9
66bddd0
2dc0026
22a9440
a983338
fc78285
2dc0026
86fd671
2dc0026
 
eb9bbc9
22a9440
f43048b
7c00855
 
66bddd0
29ff848
8b8d145
66bddd0
792f0d9
 
65a1999
877669b
86fd671
877669b
66bddd0
29ff848
 
2dc0026
eb9bbc9
66bddd0
2dc0026
66bddd0
2dc0026
 
 
792f0d9
66bddd0
 
 
9f81ae1
b3f8239
 
9f81ae1
2dc0026
9f81ae1
 
66bddd0
9f81ae1
 
 
 
 
2dab601
96e634c
7a0a525
fc78285
 
 
87d54f8
8149bed
 
 
 
 
2dc0026
8149bed
 
 
66bddd0
6ad247e
 
 
8b8d145
6ad247e
 
 
 
eace261
66bddd0
 
22a9440
 
66bddd0
 
 
 
103238e
6f17570
 
22a9440
870e32c
22a9440
eb9bbc9
66bddd0
 
 
 
 
 
 
316b88a
 
 
b8cfeb2
316b88a
e62de04
e61c9de
6c2fda1
f807850
e9db06b
bae8564
316b88a
bae8564
 
77565d7
9139588
66bddd0
 
22a9440
556c84b
ebe8e92
 
 
66bddd0
 
e61c9de
 
ebe8e92
 
d4bb298
 
66bddd0

from gpt_index import GPTSimpleVectorIndex
from llama_index.indices.query.query_transform.base import HyDEQueryTransform
import gradio as gr
from gradio import Interface, Textbox
import sys
import os
from datetime import datetime, timedelta
import pytz
import huggingface_hub
from huggingface_hub import Repository, HfApi
from datetime import datetime
import csv

os.environ["OPENAI_API_KEY"] = os.environ['SECRET_CODE']

AUS_TIMEZONE = pytz.timezone('Australia/Sydney')

# Best practice is to use a persistent dataset 
DATASET_REPO_URL = "https://huggingface.co/datasets/peterpull/MediatorBot"
DATA_FILENAME = "data.txt"
INDEX_FILENAME = "index2.json"
DATA_FILE = os.path.join("data", DATA_FILENAME)
INDEX_FILE = os.path.join("data", INDEX_FILENAME)

#this will be called later to upload the chat history back to the dataset
api=HfApi()

# we need a HF access token - read I think suffices becuase we are cloning the distant repo to local space repo.
HF_TOKEN = os.environ.get("HF_TOKEN")
print("HF TOKEN is none?", HF_TOKEN is None)
print("HF hub ver", huggingface_hub.__version__)

#Clones the distant repo to the local repo
repo = Repository(
    local_dir='data', 
    clone_from=DATASET_REPO_URL, 
    use_auth_token=HF_TOKEN)

#PRINT file locations
print(f"Repo local_dir: {repo.local_dir}")
print(f"Repo files: {os.listdir(repo.local_dir)}")
print (f"Index file:{INDEX_FILENAME}")



def generate_text() -> str:
    with open(DATA_FILE) as file:
        text = ""
        for line in file:
            row_parts = line.strip().split(",")
            if len(row_parts) != 3:
                continue
            user, chatbot, time = row_parts
            text += f"Time: {time}\nUser: {user}\nChatbot: {chatbot}\n\n"
        return text if text else "No messages yet"

def store_message(chatinput: str, chatresponse: str):
    if chatinput and chatresponse:

        now = datetime.now() # current time in UTC
        aus_time = now.astimezone(AUS_TIMEZONE) # convert to Australia timezone
        timestamp = aus_time.strftime("%Y-%m-%d %H:%M:%S")
        user_input = f"User: {chatinput}"
        chatbot_response = f"Chatbot: {chatresponse}"
        separator = "-" * 30
        message = f"{timestamp}\n{user_input}\n{chatbot_response}\n{separator}\n"
  
        with open(DATA_FILE, "a") as file:
            file.write(message)
            print(f"Wrote to datafile: {message}")
              
    #need to find a way to push back to dataset repo
        HF_WRITE_TOKEN = os.environ.get("WRITE_TOKEN")
        api.upload_file(
            path_or_fileobj=DATA_FILE,
            path_in_repo='data.txt',
            repo_id="peterpull/MediatorBot",
            repo_type="dataset",
            commit_message="Add new chat history",
            use_auth_token=HF_WRITE_TOKEN)
            
    return generate_text()

def get_index(index_file_path):
    if os.path.exists(index_file_path):
        #print 500 characters of json header
        print_header_json_file(index_file_path)
        index_size = os.path.getsize(index_file_path)
        print(f"Size of {index_file_path}: {index_size} bytes") #let me know how big json file is.
          
        loaded_index = GPTSimpleVectorIndex.load_from_disk(index_file_path)
        return loaded_index
    else:
        print(f"Error: '{index_file_path}' does not exist.")
        sys.exit()

def print_header_json_file(filepath):
    with open(filepath, 'r') as f:
        file_contents = f.read()
        print ("JSON FILE HEADER:")
        print(file_contents[:500]) # print only the first 500 characters

index = get_index(INDEX_FILE)
# define the conversation_history list
conversation_history = []

# passes the prompt to the chatbot
def chatbot(input_text, history=conversation_history):
    hyde= HyDEQueryTransform(include_original=True)
    prompt = f"In character as John Haynes, please respond to: {input_text}. Only reply with contextual information or say you cannot find an answer. End with a reflective question."
    response = index.query(prompt, response_mode="default", verbose=True, query_transform=hyde)
    store_message(input_text,response)

   # append the current input and response to the conversation history
    history.append((input_text, response.response))
    
 # return the response and updated conversation history
    return [(input_text, response.response)], history

with open('about.txt', 'r') as file:
    about = file.read()

examples=[["What are three excellent questions to ask at intake?"],["How do you handle high conflict divorce cases?"],["Which metaphors do you steer parties away from in mediation? Which do you prefer?"]]
description="GPT3_Chatbot drawing on contextual mediation material, v0.6H"
title="The MediatorBot"

iface = Interface(
    fn=chatbot,
    inputs=[Textbox("Enter your question"), "state"],
    outputs=["chatbot", "state"],
    title=title,
    description=description,
    article=about,
    examples=examples)
                                         
iface.launch()