william4416 commited on
Commit
5812112
·
verified ·
1 Parent(s): aea0b6c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +56 -36
app.py CHANGED
@@ -2,52 +2,72 @@ from transformers import AutoModelForCausalLM, AutoTokenizer
2
  import gradio as gr
3
  import torch
4
  import json
5
- import re
6
 
7
  title = "????AI ChatBot"
8
  description = "A State-of-the-Art Large-scale Pretrained Response generation model (DialoGPT)"
9
- examples = [["What courses under Engineering?"]]
10
 
11
  tokenizer = AutoTokenizer.from_pretrained("microsoft/DialoGPT-large")
12
  model = AutoModelForCausalLM.from_pretrained("microsoft/DialoGPT-large")
13
 
14
- # Load courses data from JSON file
15
- def load_courses(file_path):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  with open(file_path, 'r') as file:
17
- courses_data = json.load(file)
18
- return courses_data
19
 
20
- courses_data = load_courses('uts_courses.json')
 
 
21
 
22
- def predict(input, history=[]):
23
- # Check if the input is related to courses
24
- if re.match(r'what courses under (\w+)', input.strip().lower()):
25
- # Extract the field of interest (e.g., Engineering, Information Technology, etc.)
26
- match = re.match(r'what courses under (\w+)', input.strip().lower())
27
- field = match.group(1).capitalize() # Capitalize the field name
28
-
29
- if field in courses_data['courses']:
30
- # Get the list of courses for the specified field
31
- courses = courses_data['courses'][field]
32
- response = f"The available courses in {field} are: {', '.join(courses)}"
33
- else:
34
- response = "I'm sorry, I couldn't find any courses related to that field."
35
-
36
- return response, history
37
- else:
38
- # Tokenize the new input sentence
39
- new_user_input_ids = tokenizer.encode(input + tokenizer.eos_token, return_tensors="pt")
40
-
41
- # Append the new user input tokens to the chat history
42
- bot_input_ids = torch.cat([torch.LongTensor(history), new_user_input_ids], dim=-1)
43
-
44
- # Generate a response
45
- history = model.generate(bot_input_ids, max_length=4000, pad_token_id=tokenizer.eos_token_id).tolist()
46
-
47
- # Convert the tokens to text
48
- response = tokenizer.decode(history[0])
49
-
50
- return response, history
51
 
52
  gr.Interface(
53
  fn=predict,
 
2
  import gradio as gr
3
  import torch
4
  import json
 
5
 
6
  title = "????AI ChatBot"
7
  description = "A State-of-the-Art Large-scale Pretrained Response generation model (DialoGPT)"
8
+ examples = [["How are you?"]]
9
 
10
  tokenizer = AutoTokenizer.from_pretrained("microsoft/DialoGPT-large")
11
  model = AutoModelForCausalLM.from_pretrained("microsoft/DialoGPT-large")
12
 
13
+ # Course information
14
+ course_info = {
15
+ "courses": {
16
+ "Engineering": ["Civil Engineering", "Mechanical Engineering", "Electrical Engineering", "Software Engineering", "etc."],
17
+ "Information Technology": ["Computer Science", "Information Systems", "Cybersecurity", "Data Science", "etc."],
18
+ "Business": ["Business Administration", "Accounting", "Finance", "Marketing", "Management", "etc."],
19
+ "Health Sciences": ["Nursing", "Pharmacy", "Health Information Management", "Public Health", "etc."],
20
+ "Design and Architecture": ["Architecture", "Industrial Design", "Visual Communication", "etc."],
21
+ "Science": ["Environmental Science", "Biotechnology", "Chemistry", "Physics", "etc."],
22
+ "Law": ["Law", "Legal Studies", "etc."],
23
+ "Arts and Social Sciences": ["Communication", "Education", "International Studies", "Sociology", "etc."],
24
+ "Built Environment": ["Urban Planning", "Construction Management", "Property Economics", "etc."],
25
+ "Creative Industries": ["Animation", "Photography", "Creative Writing", "Film and Television", "etc."]
26
+ }
27
+ }
28
+
29
+ def predict(input, history=[]):
30
+ # Check if the user is asking about courses
31
+ if "course" in input.lower():
32
+ # Extract the subject from the user input
33
+ for subject in course_info["courses"]:
34
+ if subject.lower() in input.lower():
35
+ return course_info["courses"][subject], history
36
+
37
+ # tokenize the new input sentence
38
+ new_user_input_ids = tokenizer.encode(input + tokenizer.eos_token, return_tensors="pt")
39
+
40
+ # append the new user input tokens to the chat history
41
+ bot_input_ids = torch.cat([torch.LongTensor(history), new_user_input_ids], dim=-1)
42
+
43
+ # generate a response
44
+ history = model.generate(
45
+ bot_input_ids, max_length=4000, pad_token_id=tokenizer.eos_token_id
46
+ ).tolist()
47
+
48
+ # convert the tokens to text, and then split the responses into lines
49
+ response = tokenizer.decode(history[0]).split("")
50
+ response = [(response[i], response[i + 1]) for i in range(0, len(response) - 1, 2)] # convert to tuples of list
51
+ return response, history
52
+
53
+ def read_json_file(file_path): # read json file test
54
  with open(file_path, 'r') as file:
55
+ data = json.load(file)
56
+ return data
57
 
58
+ def main():
59
+ # List of file names
60
+ file_names = ['fileone.json', 'filesecond.json', 'filethird.json', 'filefourth.json', 'filefifth.json']
61
 
62
+ # Read each JSON file and print its content
63
+ for file_name in file_names:
64
+ json_data = read_json_file(file_name)
65
+ print(f"Contents of {file_name}:")
66
+ print(json_data)
67
+ print()
68
+
69
+ if __name__ == "__main__":
70
+ main()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
 
72
  gr.Interface(
73
  fn=predict,