peterpull commited on
Commit
66bddd0
·
1 Parent(s): 6ead8e7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +43 -19
app.py CHANGED
@@ -1,6 +1,6 @@
1
  from gpt_index import GPTSimpleVectorIndex
2
- from langchain import OpenAI
3
  import gradio as gr
 
4
  import sys
5
  import os
6
  import datetime
@@ -11,27 +11,34 @@ import csv
11
 
12
  os.environ["OPENAI_API_KEY"] = os.environ['SECRET_CODE']
13
 
14
- # Need to write to persistent dataset because cannot store temp data on spaces
15
  DATASET_REPO_URL = "https://huggingface.co/datasets/peterpull/MediatorBot"
16
  DATA_FILENAME = "data.txt"
17
- DATA_FILE = os.path.join("data", DATA_FILENAME)
18
 
19
- # I am guessing we need a write access token.
20
  HF_TOKEN = os.environ.get("HF_TOKEN")
21
  print("HF TOKEN is none?", HF_TOKEN is None)
22
  print("HF hub ver", huggingface_hub.__version__)
23
 
 
24
  repo = Repository(
25
- local_dir="data",
26
  clone_from=DATASET_REPO_URL,
27
  use_auth_token=HF_TOKEN)
28
 
 
 
 
 
 
 
29
 
30
  def generate_text() -> str:
31
  with open(DATA_FILE) as file:
32
  text = ""
33
  for line in file:
34
- row_parts = line.strip().split(";")
35
  if len(row_parts) != 3:
36
  continue
37
  user, chatbot, time = row_parts
@@ -42,21 +49,35 @@ def store_message(chatinput: str, chatresponse: str):
42
  if chatinput and chatresponse:
43
  with open(DATA_FILE, "a") as file:
44
  file.write(f"{datetime.now()},{chatinput},{chatresponse}\n")
45
-
46
- return generate_text()
 
47
 
48
- #gets the index file which is the context data
 
49
  def get_index(index_file_path):
50
  if os.path.exists(index_file_path):
 
 
 
 
 
51
  return GPTSimpleVectorIndex.load_from_disk(index_file_path)
52
  else:
53
  print(f"Error: '{index_file_path}' does not exist.")
54
  sys.exit()
55
 
 
 
 
 
 
 
 
 
56
  # passes the prompt to the chatbot
57
- def chatbot(input_text, mentioned_person='Mediator John Haynes'):
58
- index = get_index('./index/indexsmall.json')
59
- prompt = f"You are {mentioned_person}: {input_text}\n\n At the end of your answer ask a provocative question."
60
  response = index.query(prompt, response_mode="compact")
61
 
62
  store_message(input_text,response)
@@ -65,11 +86,14 @@ def chatbot(input_text, mentioned_person='Mediator John Haynes'):
65
  return response.response
66
 
67
 
 
 
68
 
69
- iface = gr.Interface(fn=chatbot,
70
- inputs=gr.inputs.Textbox("Enter your question"),
71
- outputs="text",
72
- title="AI Chatbot trained on J. Haynes mediation material, v0.1",
73
- description="test")
74
- iface.launch()
75
-
 
 
1
  from gpt_index import GPTSimpleVectorIndex
 
2
  import gradio as gr
3
+ from gradio import Interface, Textbox
4
  import sys
5
  import os
6
  import datetime
 
11
 
12
  os.environ["OPENAI_API_KEY"] = os.environ['SECRET_CODE']
13
 
14
+ # Best practice is to use a persistent dataset
15
  DATASET_REPO_URL = "https://huggingface.co/datasets/peterpull/MediatorBot"
16
  DATA_FILENAME = "data.txt"
17
+ INDEX_FILENAME = "index2.json"
18
 
19
+ # we need a HF access token - read I think suffices becuase we are cloning the distant repo to local space repo.
20
  HF_TOKEN = os.environ.get("HF_TOKEN")
21
  print("HF TOKEN is none?", HF_TOKEN is None)
22
  print("HF hub ver", huggingface_hub.__version__)
23
 
24
+ #Clones the distant repo to the local repo
25
  repo = Repository(
26
+ local_dir='data',
27
  clone_from=DATASET_REPO_URL,
28
  use_auth_token=HF_TOKEN)
29
 
30
+ DATA_FILEP = os.path.join("data", DATA_FILENAME)
31
+ INDEX_FILE = os.path.join("data", INDEX_FILENAME)
32
+
33
+ print(f"Repo local_dir: {repo.local_dir}")
34
+ print(f"Repo files: {os.listdir(repo.local_dir)}")
35
+ print (f"Index file:{INDEX_FILENAME}")
36
 
37
  def generate_text() -> str:
38
  with open(DATA_FILE) as file:
39
  text = ""
40
  for line in file:
41
+ row_parts = line.strip().split(",")
42
  if len(row_parts) != 3:
43
  continue
44
  user, chatbot, time = row_parts
 
49
  if chatinput and chatresponse:
50
  with open(DATA_FILE, "a") as file:
51
  file.write(f"{datetime.now()},{chatinput},{chatresponse}\n")
52
+ print(f"Wrote to datafile: {datetime.now()},{chatinput},{chatresponse}\n")
53
+
54
+ #need to find a way to push back to dataset repo
55
 
56
+ return generate_text()
57
+
58
  def get_index(index_file_path):
59
  if os.path.exists(index_file_path):
60
+ #print 500 characters of json header
61
+ print_header_json_file(index_file_path)
62
+ index_size = os.path.getsize(index_file_path)
63
+ print(f"Size of {index_file_path}: {index_size} bytes") #let me know how big json file is.
64
+ #debug - this is where an error is occuring
65
  return GPTSimpleVectorIndex.load_from_disk(index_file_path)
66
  else:
67
  print(f"Error: '{index_file_path}' does not exist.")
68
  sys.exit()
69
 
70
+ def print_header_json_file(filepath):
71
+ with open(filepath, 'r') as f:
72
+ file_contents = f.read()
73
+ print ("JSON FILE HEADER:")
74
+ print(file_contents[:500]) # print only the first 500 characters
75
+
76
+ index = get_index(INDEX_FILE)
77
+
78
  # passes the prompt to the chatbot
79
+ def chatbot(input_text, mentioned_person='Mediator John Haynes', confidence_threshold=0.5):
80
+ prompt = f"You are {mentioned_person}. Answer this: {input_text}. Reply from the contextual data or say you don't know. To finish, ask an insightful question."
 
81
  response = index.query(prompt, response_mode="compact")
82
 
83
  store_message(input_text,response)
 
86
  return response.response
87
 
88
 
89
+ with open('about.txt', 'r') as file:
90
+ about = file.read()
91
 
92
+ iface = Interface(
93
+ fn=chatbot,
94
+ inputs=Textbox("Enter your question"),
95
+ outputs="text",
96
+ title="AI Chatbot trained on J. Haynes mediation material, v0.5",
97
+ description=about)
98
+
99
+ iface.launch()