File size: 3,793 Bytes
22a9440
 
eb9bbc9
cf13932
22a9440
 
 
29ff848
 
 
 
eb9bbc9
22a9440
f43048b
29ff848
 
f7a821d
b5fa340
29ff848
b5fa340
96e634c
b5fa340
29ff848
 
9f81ae1
eb9bbc9
b5fa340
29ff848
fbce1e2
96e634c
 
29ff848
8a40cf4
 
9f81ae1
 
 
 
 
ba37f7c
9f81ae1
 
 
 
 
 
2dab601
 
08f7cb7
2dab601
 
 
 
96e634c
7a0a525
fbce1e2
9f81ae1
8a40cf4
f6b67b3
08f7cb7
 
f6b67b3
 
c052bcb
f6b67b3
16c492d
9f81ae1
0754fb2
7044cbd
d250469
29ff848
22a9440
 
16c492d
 
22a9440
 
 
 
eb9bbc9
e9db06b
9139588
08f7cb7
f807850
 
 
 
 
22a9440
75c051b
f807850
e9db06b
9139588
1cde52c
 
22a9440
cf13932
 
 
 
 
1cde52c
4623b74
22a9440
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
from gpt_index import GPTSimpleVectorIndex
from langchain import OpenAI
import gradio as gr
from gradio import Interface, Textbox
import sys
import os
import datetime
import huggingface_hub
from huggingface_hub import Repository
from datetime import datetime
import csv

os.environ["OPENAI_API_KEY"] = os.environ['SECRET_CODE']

# Need to write to persistent dataset because cannot store temp data on spaces
DATASET_REPO_URL = "https://huggingface.co/datasets/peterpull/MediatorBot"
DATA_FILENAME = "data.txt"
INDEX_FILENAME = "index_base_89MB.json"
DATA_FILE = os.path.join("data", DATA_FILENAME)
INDEX_FILE = os.path.join("data", INDEX_FILENAME)

# we need a write access token.
HF_TOKEN = os.environ.get("HF_TOKEN")
print("HF TOKEN is none?", HF_TOKEN is None)
print("HF hub ver", huggingface_hub.__version__)

#Clones the distant repo to the local repo
repo = Repository(
    local_dir='data', 
    clone_from=DATASET_REPO_URL, 
    use_auth_token=HF_TOKEN)

print(f"Repo local_dir: {repo.local_dir}")
print(f"Repo files: {os.listdir(repo.local_dir)}")

def generate_text() -> str:
    with open(DATA_FILE) as file:
        text = ""
        for line in file:
            row_parts = line.strip().split(",")
            if len(row_parts) != 3:
                continue
            user, chatbot, time = row_parts
            text += f"Time: {time}\nUser: {user}\nChatbot: {chatbot}\n\n"
        return text if text else "No messages yet"


def push_to_hub(commit_message):
    repo.git_add(DATA_FILE)  # Add the updated data file to the staged changes
    repo.git_commit(commit_message)  # Commit the changes
    repo.push_to_hub(token=HF_TOKEN)  # Push the changes to the remote repository


def store_message(chatinput: str, chatresponse: str):
    if chatinput and chatresponse:
        with open(DATA_FILE, "a") as file:
            file.write(f"{datetime.now()},{chatinput},{chatresponse}\n")
            print(f"Wrote to datafile: {datetime.now()},{chatinput},{chatresponse}\n")

        # Push back to hub every 2ND time the function is called
    if store_message.count_calls % 2 == 0:
        print("Pushing back to Hugging Face model hub")
        # Call the push_to_hub() function to push the changes to the hub
        push_to_hub(commit_message="Added new chat data")
    store_message.count_calls += 1
    
    return generate_text()

store_message.count_calls = 1  #initiases the count at one. We want to count how many messages stored before pushing back to repo.

#gets the index file which is the context data
def get_index(index_file_path):
    if os.path.exists(index_file_path):
        index_size = os.path.getsize(index_file_path)
        print(f"Size of {index_file_path}: {index_size} bytes") #let me know how big json file is.
        return GPTSimpleVectorIndex.load_from_disk(index_file_path)
    else:
        print(f"Error: '{index_file_path}' does not exist.")
        sys.exit()

index = get_index(INDEX_FILE)

# passes the prompt to the chatbot, queries the index, stores the output, returns the response
def chatbot(input_text, mentioned_person='Mediator John Haynes', confidence_threshold=0.5):
    prompt = f"You are {mentioned_person}. Answer this: {input_text}. Only reply from the contextual data, or say you don't know. At the end of your answer ask an insightful question."
    response = index.query(prompt, response_mode="default")

    store_message(input_text,response)
    
    # return the response
    return response.response


with open('about.txt', 'r') as file:
    about = file.read()

iface = Interface(
    fn=chatbot,
    inputs=Textbox("Enter your question"),
    outputs="text",
    title="AI Chatbot trained on J. Haynes mediation material, v0.5",
    description=about)
                                         
iface.launch()