peterpull commited on
Commit
eace261
·
1 Parent(s): da05cd2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -44
app.py CHANGED
@@ -1,6 +1,6 @@
1
- from llama_index import GPTSimpleVectorIndex
 
2
  import gradio as gr
3
- from gradio import Interface, Textbox
4
  import sys
5
  import os
6
  import datetime
@@ -11,34 +11,27 @@ import csv
11
 
12
  os.environ["OPENAI_API_KEY"] = os.environ['SECRET_CODE']
13
 
14
- # Best practice is to use a persistent dataset
15
  DATASET_REPO_URL = "https://huggingface.co/datasets/peterpull/MediatorBot"
16
  DATA_FILENAME = "data.txt"
17
- INDEX_FILENAME = "index2.json"
18
 
19
- # we need a HF access token - read I think suffices becuase we are cloning the distant repo to local space repo.
20
  HF_TOKEN = os.environ.get("HF_TOKEN")
21
  print("HF TOKEN is none?", HF_TOKEN is None)
22
  print("HF hub ver", huggingface_hub.__version__)
23
 
24
- #Clones the distant repo to the local repo
25
  repo = Repository(
26
- local_dir='data',
27
  clone_from=DATASET_REPO_URL,
28
  use_auth_token=HF_TOKEN)
29
 
30
- DATA_FILEP = os.path.join("data", DATA_FILENAME)
31
- INDEX_FILE = os.path.join("data", INDEX_FILENAME)
32
-
33
- print(f"Repo local_dir: {repo.local_dir}")
34
- print(f"Repo files: {os.listdir(repo.local_dir)}")
35
- print (f"Index file:{INDEX_FILENAME}")
36
 
37
  def generate_text() -> str:
38
  with open(DATA_FILE) as file:
39
  text = ""
40
  for line in file:
41
- row_parts = line.strip().split(",")
42
  if len(row_parts) != 3:
43
  continue
44
  user, chatbot, time = row_parts
@@ -49,35 +42,21 @@ def store_message(chatinput: str, chatresponse: str):
49
  if chatinput and chatresponse:
50
  with open(DATA_FILE, "a") as file:
51
  file.write(f"{datetime.now()},{chatinput},{chatresponse}\n")
52
- print(f"Wrote to datafile: {datetime.now()},{chatinput},{chatresponse}\n")
53
-
54
- #need to find a way to push back to dataset repo
55
-
56
- return generate_text()
57
 
 
 
 
58
  def get_index(index_file_path):
59
  if os.path.exists(index_file_path):
60
- #print 500 characters of json header
61
- print_header_json_file(index_file_path)
62
- index_size = os.path.getsize(index_file_path)
63
- print(f"Size of {index_file_path}: {index_size} bytes") #let me know how big json file is.
64
- #debug - this is where an error is occuring
65
  return GPTSimpleVectorIndex.load_from_disk(index_file_path)
66
  else:
67
  print(f"Error: '{index_file_path}' does not exist.")
68
  sys.exit()
69
 
70
- def print_header_json_file(filepath):
71
- with open(filepath, 'r') as f:
72
- file_contents = f.read()
73
- print ("JSON FILE HEADER:")
74
- print(file_contents[:500]) # print only the first 500 characters
75
-
76
- index = get_index(INDEX_FILE)
77
-
78
  # passes the prompt to the chatbot
79
- def chatbot(input_text, mentioned_person='Mediator John Haynes', confidence_threshold=0.5):
80
- prompt = f"You are {mentioned_person}. Answer this: {input_text}. Reply from the contextual data or say you don't know. To finish, ask an insightful question."
 
81
  response = index.query(prompt, response_mode="compact")
82
 
83
  store_message(input_text,response)
@@ -86,14 +65,11 @@ def chatbot(input_text, mentioned_person='Mediator John Haynes', confidence_thre
86
  return response.response
87
 
88
 
89
- with open('about.txt', 'r') as file:
90
- about = file.read()
91
 
92
- iface = Interface(
93
- fn=chatbot,
94
- inputs=Textbox("Enter your question"),
95
- outputs="text",
96
- title="AI Chatbot trained on J. Haynes mediation material, v0.5",
97
- description=about)
98
-
99
- iface.launch()
 
1
+ from gpt_index import GPTSimpleVectorIndex
2
+ from langchain import OpenAI
3
  import gradio as gr
 
4
  import sys
5
  import os
6
  import datetime
 
11
 
12
  os.environ["OPENAI_API_KEY"] = os.environ['SECRET_CODE']
13
 
14
+ # Need to write to persistent dataset because cannot store temp data on spaces
15
  DATASET_REPO_URL = "https://huggingface.co/datasets/peterpull/MediatorBot"
16
  DATA_FILENAME = "data.txt"
17
+ DATA_FILE = os.path.join("data", DATA_FILENAME)
18
 
19
+ # I am guessing we need a write access token.
20
  HF_TOKEN = os.environ.get("HF_TOKEN")
21
  print("HF TOKEN is none?", HF_TOKEN is None)
22
  print("HF hub ver", huggingface_hub.__version__)
23
 
 
24
  repo = Repository(
25
+ local_dir="data",
26
  clone_from=DATASET_REPO_URL,
27
  use_auth_token=HF_TOKEN)
28
 
 
 
 
 
 
 
29
 
30
  def generate_text() -> str:
31
  with open(DATA_FILE) as file:
32
  text = ""
33
  for line in file:
34
+ row_parts = line.strip().split(";")
35
  if len(row_parts) != 3:
36
  continue
37
  user, chatbot, time = row_parts
 
42
  if chatinput and chatresponse:
43
  with open(DATA_FILE, "a") as file:
44
  file.write(f"{datetime.now()},{chatinput},{chatresponse}\n")
 
 
 
 
 
45
 
46
+ return generate_text()
47
+
48
+ #gets the index file which is the context data
49
  def get_index(index_file_path):
50
  if os.path.exists(index_file_path):
 
 
 
 
 
51
  return GPTSimpleVectorIndex.load_from_disk(index_file_path)
52
  else:
53
  print(f"Error: '{index_file_path}' does not exist.")
54
  sys.exit()
55
 
 
 
 
 
 
 
 
 
56
  # passes the prompt to the chatbot
57
+ def chatbot(input_text, mentioned_person='Mediator John Haynes'):
58
+ index = get_index('./index/indexsmall.json')
59
+ prompt = f"You are {mentioned_person}: {input_text}\n\n At the end of your answer ask a provocative question."
60
  response = index.query(prompt, response_mode="compact")
61
 
62
  store_message(input_text,response)
 
65
  return response.response
66
 
67
 
 
 
68
 
69
+ iface = gr.Interface(fn=chatbot,
70
+ inputs=gr.inputs.Textbox("Enter your question"),
71
+ outputs="text",
72
+ title="AI Chatbot trained on J. Haynes mediation material, v0.1",
73
+ description="test")
74
+ iface.launch()
75
+