from gpt_index import GPTSimpleVectorIndex from langchain import OpenAI import gradio as gr from gradio import Interface, Textbox import sys import os import datetime import huggingface_hub from huggingface_hub import Repository from datetime import datetime import csv os.environ["OPENAI_API_KEY"] = os.environ['SECRET_CODE'] # Need to write to persistent dataset because cannot store temp data on spaces DATASET_REPO_URL = "https://huggingface.co/datasets/peterpull/MediatorBot" DATA_FILENAME = "data.txt" INDEX_FILENAME = "index_base_89MB.json" DATA_FILE = os.path.join("data", DATA_FILENAME) INDEX_FILE = os.path.join("data", INDEX_FILENAME) # we need a write access token. HF_TOKEN = os.environ.get("HF_TOKEN") print("HF TOKEN is none?", HF_TOKEN is None) print("HF hub ver", huggingface_hub.__version__) #Clones the distant repo to the local repo repo = Repository( local_dir='data', clone_from=DATASET_REPO_URL, use_auth_token=HF_TOKEN) print(f"Repo local_dir: {repo.local_dir}") print(f"Repo files: {os.listdir(repo.local_dir)}") def generate_text() -> str: with open(DATA_FILE) as file: text = "" for line in file: row_parts = line.strip().split(",") if len(row_parts) != 3: continue user, chatbot, time = row_parts text += f"Time: {time}\nUser: {user}\nChatbot: {chatbot}\n\n" return text if text else "No messages yet" def store_message(chatinput: str, chatresponse: str): if chatinput and chatresponse: with open(DATA_FILE, "a") as file: file.write(f"{datetime.now()},{chatinput},{chatresponse}\n") print(f"Wrote to datafile: {datetime.now()},{chatinput},{chatresponse}\n") # Push back to hub every 10th time the function is called if store_message.count_calls % 10 == 0: print("Pushing back to Hugging Face model hub") # Call the push_to_hub() function to push the changes to the hub push_to_hub(commit_message="Added new chat data") store_message.count_calls += 1 return generate_text() store_message.count_calls = 1 #initiases the count at one. We want to count how many messages stored before pushing back to repo. #gets the index file which is the context data def get_index(index_file_path): if os.path.exists(index_file_path): index_size = os.path.getsize(index_file_path) print(f"Size of {index_file_path}: {index_size} bytes") #let me know how big json file is. return GPTSimpleVectorIndex.load_from_disk(index_file_path) else: print(f"Error: '{index_file_path}' does not exist.") sys.exit() index = get_index(INDEX_FILE) # passes the prompt to the chatbot def chatbot(input_text, mentioned_person='Mediator John Haynes', confidence_threshold=0.5): prompt = f"You are {mentioned_person}. Answer this: {input_text}. Only reply from the contextual data, or say you don't know. At the end of your answer ask an insightful question." response = index.query(prompt, response_mode="default") store_message(input_text,response) # return the response return response.response with open('about.txt', 'r') as file: about = file.read() iface = Interface( fn=chatbot, inputs=Textbox("Enter your question"), outputs="text", title="AI Chatbot trained on J. Haynes mediation material, v0.5", description=about) iface.launch()