Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -2,52 +2,72 @@ from transformers import AutoModelForCausalLM, AutoTokenizer
|
|
| 2 |
import gradio as gr
|
| 3 |
import torch
|
| 4 |
import json
|
| 5 |
-
import re
|
| 6 |
|
| 7 |
title = "????AI ChatBot"
|
| 8 |
description = "A State-of-the-Art Large-scale Pretrained Response generation model (DialoGPT)"
|
| 9 |
-
examples = [["
|
| 10 |
|
| 11 |
tokenizer = AutoTokenizer.from_pretrained("microsoft/DialoGPT-large")
|
| 12 |
model = AutoModelForCausalLM.from_pretrained("microsoft/DialoGPT-large")
|
| 13 |
|
| 14 |
-
#
|
| 15 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
with open(file_path, 'r') as file:
|
| 17 |
-
|
| 18 |
-
return
|
| 19 |
|
| 20 |
-
|
|
|
|
|
|
|
| 21 |
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
courses = courses_data['courses'][field]
|
| 32 |
-
response = f"The available courses in {field} are: {', '.join(courses)}"
|
| 33 |
-
else:
|
| 34 |
-
response = "I'm sorry, I couldn't find any courses related to that field."
|
| 35 |
-
|
| 36 |
-
return response, history
|
| 37 |
-
else:
|
| 38 |
-
# Tokenize the new input sentence
|
| 39 |
-
new_user_input_ids = tokenizer.encode(input + tokenizer.eos_token, return_tensors="pt")
|
| 40 |
-
|
| 41 |
-
# Append the new user input tokens to the chat history
|
| 42 |
-
bot_input_ids = torch.cat([torch.LongTensor(history), new_user_input_ids], dim=-1)
|
| 43 |
-
|
| 44 |
-
# Generate a response
|
| 45 |
-
history = model.generate(bot_input_ids, max_length=4000, pad_token_id=tokenizer.eos_token_id).tolist()
|
| 46 |
-
|
| 47 |
-
# Convert the tokens to text
|
| 48 |
-
response = tokenizer.decode(history[0])
|
| 49 |
-
|
| 50 |
-
return response, history
|
| 51 |
|
| 52 |
gr.Interface(
|
| 53 |
fn=predict,
|
|
|
|
| 2 |
import gradio as gr
|
| 3 |
import torch
|
| 4 |
import json
|
|
|
|
| 5 |
|
| 6 |
title = "????AI ChatBot"
|
| 7 |
description = "A State-of-the-Art Large-scale Pretrained Response generation model (DialoGPT)"
|
| 8 |
+
examples = [["How are you?"]]
|
| 9 |
|
| 10 |
tokenizer = AutoTokenizer.from_pretrained("microsoft/DialoGPT-large")
|
| 11 |
model = AutoModelForCausalLM.from_pretrained("microsoft/DialoGPT-large")
|
| 12 |
|
| 13 |
+
# Course information
|
| 14 |
+
course_info = {
|
| 15 |
+
"courses": {
|
| 16 |
+
"Engineering": ["Civil Engineering", "Mechanical Engineering", "Electrical Engineering", "Software Engineering", "etc."],
|
| 17 |
+
"Information Technology": ["Computer Science", "Information Systems", "Cybersecurity", "Data Science", "etc."],
|
| 18 |
+
"Business": ["Business Administration", "Accounting", "Finance", "Marketing", "Management", "etc."],
|
| 19 |
+
"Health Sciences": ["Nursing", "Pharmacy", "Health Information Management", "Public Health", "etc."],
|
| 20 |
+
"Design and Architecture": ["Architecture", "Industrial Design", "Visual Communication", "etc."],
|
| 21 |
+
"Science": ["Environmental Science", "Biotechnology", "Chemistry", "Physics", "etc."],
|
| 22 |
+
"Law": ["Law", "Legal Studies", "etc."],
|
| 23 |
+
"Arts and Social Sciences": ["Communication", "Education", "International Studies", "Sociology", "etc."],
|
| 24 |
+
"Built Environment": ["Urban Planning", "Construction Management", "Property Economics", "etc."],
|
| 25 |
+
"Creative Industries": ["Animation", "Photography", "Creative Writing", "Film and Television", "etc."]
|
| 26 |
+
}
|
| 27 |
+
}
|
| 28 |
+
|
| 29 |
+
def predict(input, history=[]):
|
| 30 |
+
# Check if the user is asking about courses
|
| 31 |
+
if "course" in input.lower():
|
| 32 |
+
# Extract the subject from the user input
|
| 33 |
+
for subject in course_info["courses"]:
|
| 34 |
+
if subject.lower() in input.lower():
|
| 35 |
+
return course_info["courses"][subject], history
|
| 36 |
+
|
| 37 |
+
# tokenize the new input sentence
|
| 38 |
+
new_user_input_ids = tokenizer.encode(input + tokenizer.eos_token, return_tensors="pt")
|
| 39 |
+
|
| 40 |
+
# append the new user input tokens to the chat history
|
| 41 |
+
bot_input_ids = torch.cat([torch.LongTensor(history), new_user_input_ids], dim=-1)
|
| 42 |
+
|
| 43 |
+
# generate a response
|
| 44 |
+
history = model.generate(
|
| 45 |
+
bot_input_ids, max_length=4000, pad_token_id=tokenizer.eos_token_id
|
| 46 |
+
).tolist()
|
| 47 |
+
|
| 48 |
+
# convert the tokens to text, and then split the responses into lines
|
| 49 |
+
response = tokenizer.decode(history[0]).split("")
|
| 50 |
+
response = [(response[i], response[i + 1]) for i in range(0, len(response) - 1, 2)] # convert to tuples of list
|
| 51 |
+
return response, history
|
| 52 |
+
|
| 53 |
+
def read_json_file(file_path): # read json file test
|
| 54 |
with open(file_path, 'r') as file:
|
| 55 |
+
data = json.load(file)
|
| 56 |
+
return data
|
| 57 |
|
| 58 |
+
def main():
|
| 59 |
+
# List of file names
|
| 60 |
+
file_names = ['fileone.json', 'filesecond.json', 'filethird.json', 'filefourth.json', 'filefifth.json']
|
| 61 |
|
| 62 |
+
# Read each JSON file and print its content
|
| 63 |
+
for file_name in file_names:
|
| 64 |
+
json_data = read_json_file(file_name)
|
| 65 |
+
print(f"Contents of {file_name}:")
|
| 66 |
+
print(json_data)
|
| 67 |
+
print()
|
| 68 |
+
|
| 69 |
+
if __name__ == "__main__":
|
| 70 |
+
main()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 71 |
|
| 72 |
gr.Interface(
|
| 73 |
fn=predict,
|