william4416 commited on
Commit
aea0b6c
·
verified ·
1 Parent(s): ccacb35

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +35 -49
app.py CHANGED
@@ -2,65 +2,52 @@ from transformers import AutoModelForCausalLM, AutoTokenizer
2
  import gradio as gr
3
  import torch
4
  import json
5
-
6
 
7
  title = "????AI ChatBot"
8
  description = "A State-of-the-Art Large-scale Pretrained Response generation model (DialoGPT)"
9
- examples = [["What courses are available in Engineering?"]]
10
-
11
 
12
  tokenizer = AutoTokenizer.from_pretrained("microsoft/DialoGPT-large")
13
  model = AutoModelForCausalLM.from_pretrained("microsoft/DialoGPT-large")
14
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
15
-
16
- model.to(device)
17
 
18
  # Load courses data from JSON file
19
- with open("uts_courses.json", "r") as f:
20
- courses_data = json.load(f)
 
 
21
 
 
22
 
23
  def predict(input, history=[]):
24
- # Check if the input question is about courses
25
- if "courses" in input.lower():
26
- # Check if the input question contains a specific field (e.g., Engineering, Information Technology, etc.)
27
- for field in courses_data["courses"]:
28
- if field.lower() in input.lower():
29
- # Get the list of courses for the specified field
30
- courses_list = courses_data["courses"][field]
31
- # Format the response
32
- response = f"The available courses in {field} are: {', '.join(courses_list)}."
33
- return response, history
34
-
35
- # If the input question is not about courses, use the dialogue model to generate a response
36
- # tokenize the new input sentence
37
- new_user_input_ids = tokenizer.encode(
38
- input + tokenizer.eos_token, return_tensors="pt"
39
- ).to(device)
40
-
41
- # append the new user input tokens to the chat history
42
- bot_input_ids = torch.cat([torch.tensor(history).to(device), new_user_input_ids], dim=-1)
43
-
44
- # generate a response
45
- history = model.generate(
46
- bot_input_ids, max_length=4000, pad_token_id=tokenizer.eos_token_id
47
- ).tolist()
48
-
49
- # convert the tokens to text, and then split the responses into lines
50
- response = tokenizer.decode(history[0]).split("")
51
- response = [
52
- (response[i], response[i + 1]) for i in range(0, len(response) - 1, 2)
53
- ] # convert to tuples of list
54
- return response, history
55
-
56
-
57
- def main():
58
- pass
59
-
60
-
61
- if __name__ == "__main__":
62
- main()
63
-
64
 
65
  gr.Interface(
66
  fn=predict,
@@ -70,5 +57,4 @@ gr.Interface(
70
  inputs=["text", "state"],
71
  outputs=["chatbot", "state"],
72
  theme="finlaymacklon/boxy_violet",
73
- share=True,
74
  ).launch()
 
2
  import gradio as gr
3
  import torch
4
  import json
5
+ import re
6
 
7
  title = "????AI ChatBot"
8
  description = "A State-of-the-Art Large-scale Pretrained Response generation model (DialoGPT)"
9
+ examples = [["What courses under Engineering?"]]
 
10
 
11
  tokenizer = AutoTokenizer.from_pretrained("microsoft/DialoGPT-large")
12
  model = AutoModelForCausalLM.from_pretrained("microsoft/DialoGPT-large")
 
 
 
13
 
14
  # Load courses data from JSON file
15
+ def load_courses(file_path):
16
+ with open(file_path, 'r') as file:
17
+ courses_data = json.load(file)
18
+ return courses_data
19
 
20
+ courses_data = load_courses('uts_courses.json')
21
 
22
  def predict(input, history=[]):
23
+ # Check if the input is related to courses
24
+ if re.match(r'what courses under (\w+)', input.strip().lower()):
25
+ # Extract the field of interest (e.g., Engineering, Information Technology, etc.)
26
+ match = re.match(r'what courses under (\w+)', input.strip().lower())
27
+ field = match.group(1).capitalize() # Capitalize the field name
28
+
29
+ if field in courses_data['courses']:
30
+ # Get the list of courses for the specified field
31
+ courses = courses_data['courses'][field]
32
+ response = f"The available courses in {field} are: {', '.join(courses)}"
33
+ else:
34
+ response = "I'm sorry, I couldn't find any courses related to that field."
35
+
36
+ return response, history
37
+ else:
38
+ # Tokenize the new input sentence
39
+ new_user_input_ids = tokenizer.encode(input + tokenizer.eos_token, return_tensors="pt")
40
+
41
+ # Append the new user input tokens to the chat history
42
+ bot_input_ids = torch.cat([torch.LongTensor(history), new_user_input_ids], dim=-1)
43
+
44
+ # Generate a response
45
+ history = model.generate(bot_input_ids, max_length=4000, pad_token_id=tokenizer.eos_token_id).tolist()
46
+
47
+ # Convert the tokens to text
48
+ response = tokenizer.decode(history[0])
49
+
50
+ return response, history
 
 
 
 
 
 
 
 
 
 
 
 
51
 
52
  gr.Interface(
53
  fn=predict,
 
57
  inputs=["text", "state"],
58
  outputs=["chatbot", "state"],
59
  theme="finlaymacklon/boxy_violet",
 
60
  ).launch()