Ajit Panday commited on
Commit
4dcbc6c
·
1 Parent(s): 0680865

Initial commit: Customer Support Chatbot with DialoGPT-medium

Browse files
Files changed (2) hide show
  1. car_service_data.jsonl +102 -0
  2. train.py +43 -13
car_service_data.jsonl ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"input": "I need an oil change", "response": "Sure, when would you like to schedule your oil change?"}
2
+ {"input": "Book a tire rotation for Friday", "response": "I can book your tire rotation for Friday. What time works for you?"}
3
+ {"input": "Can you check my brakes?", "response": "Yes, we can check your brakes. When would you like to come in?"}
4
+ {"input": "I want a car wash tomorrow", "response": "Great, I can schedule a car wash for tomorrow. What time suits you?"}
5
+ {"input": "How much for an oil change?", "response": "An oil change costs $40-$60, depending on your car. Want to book one?"}
6
+ {"input": "Schedule a tune-up next week", "response": "I’ll book a tune-up for next week. Which day and time work best?"}
7
+ {"input": "My car needs new tires", "response": "We can replace your tires. When would you like to bring your car in?"}
8
+ {"input": "Can I get a service today?", "response": "Let me check availability. What service do you need today?"}
9
+ {"input": "I need a battery replacement", "response": "We can replace your battery. When would you like to schedule it?"}
10
+ {"input": "What’s a full service?", "response": "A full service includes oil change, tire check, and more. Want to book one?"}
11
+ {"input": "Fix my AC on Monday", "response": "I’ll schedule an AC repair for Monday. What time works?"}
12
+ {"input": "How much for new brakes?", "response": "Brake replacement is $100-$200. Want to book it?"}
13
+ {"input": "I need an alignment", "response": "We can do an alignment. When would you like to come in?"}
14
+ {"input": "Book a service for Saturday", "response": "What service would you like to book for Saturday?"}
15
+ {"input": "Check my engine light", "response": "We can check your engine light. When are you available?"}
16
+ {"input": "How long for an oil change?", "response": "An oil change takes about 30 minutes. Want to schedule one?"}
17
+ {"input": "I need new wipers", "response": "We can replace your wipers. When would you like to stop by?"}
18
+ {"input": "Can you do a detailing?", "response": "Yes, we offer detailing. When would you like to schedule it?"}
19
+ {"input": "Book a brake job for next Tuesday", "response": "I’ll book your brake job for next Tuesday. What time?"}
20
+ {"input": "What’s included in a tune-up?", "response": "A tune-up includes spark plugs, filters, and more. Want to book one?"}
21
+ {"input": "Replace my spark plugs", "response": "We can replace your spark plugs. When would you like to come in?"}
22
+ {"input": "Book a transmission check for Wednesday", "response": "I’ll book a transmission check for Wednesday. What time?"}
23
+ {"input": "I need a headlight fixed", "response": "We can fix your headlight. When would you like to schedule it?"}
24
+ {"input": "How much for a tire rotation?", "response": "A tire rotation costs $20-$30. Want to book one?"}
25
+ {"input": "Schedule an inspection for this weekend", "response": "I’ll book an inspection for this weekend. Saturday or Sunday?"}
26
+ {"input": "My car’s overheating", "response": "We can check that for you. When can you bring it in?"}
27
+ {"input": "Can you top up my fluids?", "response": "Yes, we can top up your fluids. When would you like to come by?"}
28
+ {"input": "Book an oil change for next Thursday", "response": "I’ll book your oil change for next Thursday. What time?"}
29
+ {"input": "How long for a brake job?", "response": "A brake job takes about 1-2 hours. Want to schedule one?"}
30
+ {"input": "I need a wheel balancing", "response": "We can balance your wheels. When would you like to schedule it?"}
31
+ {"input": "Fix my muffler next Friday", "response": "I’ll schedule a muffler repair for next Friday. What time?"}
32
+ {"input": "How much for a battery replacement?", "response": "A battery replacement costs $80-$120. Want to book it?"}
33
+ {"input": "I need a belt replaced", "response": "We can replace your belt. When would you like to come in?"}
34
+ {"input": "Book a service for Monday morning", "response": "What service would you like to book for Monday morning?"}
35
+ {"input": "Check my tire pressure", "response": "We can check your tire pressure. When are you available?"}
36
+ {"input": "How long for a car wash?", "response": "A car wash takes about 20 minutes. Want to schedule one?"}
37
+ {"input": "I need a new air filter", "response": "We can replace your air filter. When would you like to stop by?"}
38
+ {"input": "Can you do an oil change this afternoon?", "response": "Let me check availability for this afternoon. Any specific time?"}
39
+ {"input": "Book a tire change for next Wednesday", "response": "I’ll book your tire change for next Wednesday. What time?"}
40
+ {"input": "What’s included in an inspection?", "response": "An inspection covers brakes, tires, and more. Want to book one?"}
41
+ {"input": "I need my brakes fixed", "response": "We can fix your brakes. When would you like to schedule it?"}
42
+ {"input": "Book an AC check for tomorrow", "response": "I’ll book an AC check for tomorrow. What time works for you?"}
43
+ {"input": "How much for a wheel alignment?", "response": "A wheel alignment costs $50-$80. Want to book one?"}
44
+ {"input": "Schedule a detailing for Saturday", "response": "I’ll book a detailing for Saturday. What time suits you?"}
45
+ {"input": "My engine’s making a noise", "response": "We can check your engine. When can you bring it in?"}
46
+ {"input": "Can you replace my headlights?", "response": "Yes, we can replace your headlights. When would you like to come by?"}
47
+ {"input": "Book a tune-up for next Monday", "response": "I’ll book your tune-up for next Monday. What time?"}
48
+ {"input": "How long for a tire rotation?", "response": "A tire rotation takes about 30 minutes. Want to schedule one?"}
49
+ {"input": "I need a new battery tomorrow", "response": "We can replace your battery tomorrow. What time works?"}
50
+ {"input": "Can you fix my wipers today?", "response": "Let me check availability for today. Any preferred time?"}
51
+ {"input": "Book an oil change for this Friday", "response": "I’ll book your oil change for this Friday. What time?"}
52
+ {"input": "What does a brake job cost?", "response": "A brake job costs $100-$200. Want to schedule one?"}
53
+ {"input": "I need my tires rotated", "response": "We can rotate your tires. When would you like to come in?"}
54
+ {"input": "Schedule a car wash for next Tuesday", "response": "I’ll book a car wash for next Tuesday. What time?"}
55
+ {"input": "My AC isn’t cooling", "response": "We can check your AC. When can you bring it in?"}
56
+ {"input": "Can you check my fluids?", "response": "Yes, we can check your fluids. When would you like to stop by?"}
57
+ {"input": "Book a brake check for Wednesday", "response": "I’ll book a brake check for Wednesday. What time?"}
58
+ {"input": "How much for a tune-up?", "response": "A tune-up costs $80-$150. Want to book one?"}
59
+ {"input": "I need an oil change next week", "response": "We can schedule your oil change for next week. Which day?"}
60
+ {"input": "Schedule a tire repair for tomorrow", "response": "I’ll book a tire repair for tomorrow. What time suits you?"}
61
+ {"input": "My car needs a check-up", "response": "We can do a full check-up. When would you like to come in?"}
62
+ {"input": "Can you fix my transmission?", "response": "Yes, we can check your transmission. When are you available?"}
63
+ {"input": "Book a detailing for next Thursday", "response": "I’ll book a detailing for next Thursday. What time?"}
64
+ {"input": "How long for a battery replacement?", "response": "A battery replacement takes about 30 minutes. Want to book?"}
65
+ {"input": "I need new spark plugs tomorrow", "response": "We can replace your spark plugs tomorrow. What time?"}
66
+ {"input": "Can you do a wheel alignment today?", "response": "Let me check availability for today. Any preferred time?"}
67
+ {"input": "Book an inspection for next Friday", "response": "I’ll book an inspection for next Friday. What time?"}
68
+ {"input": "What’s the cost of a car wash?", "response": "A car wash costs $15-$25. Want to schedule one?"}
69
+ {"input": "I need my brakes checked tomorrow", "response": "We can check your brakes tomorrow. What time works?"}
70
+ {"input": "Schedule an oil change for Saturday", "response": "I’ll book your oil change for Saturday. What time?"}
71
+ {"input": "My tires are flat", "response": "We can fix your tires. When can you bring your car in?"}
72
+ {"input": "Can you replace my air filter?", "response": "Yes, we can replace your air filter. When would you like to come?"}
73
+ {"input": "Book a tune-up for this Wednesday", "response": "I’ll book a tune-up for this Wednesday. What time?"}
74
+ {"input": "How much for an alignment?", "response": "An alignment costs $50-$80. Want to book one?"}
75
+ {"input": "I need a car wash next Monday", "response": "We can schedule a car wash for next Monday. What time?"}
76
+ {"input": "Schedule a brake job for tomorrow", "response": "I’ll book a brake job for tomorrow. What time suits you?"}
77
+ {"input": "My engine light’s on", "response": "We can check your engine light. When can you bring it in?"}
78
+ {"input": "Can you fix my headlights today?", "response": "Let me check availability for today. Any preferred time?"}
79
+ {"input": "Book an oil change for next Tuesday", "response": "I’ll book your oil change for next Tuesday. What time?"}
80
+ {"input": "How long for a detailing?", "response": "Detailing takes about 2-3 hours. Want to schedule one?"}
81
+ {"input": "I need new wipers next week", "response": "We can replace your wipers next week. Which day works?"}
82
+ {"input": "Can you check my battery?", "response": "Yes, we can check your battery. When would you like to stop by?"}
83
+ {"input": "Book a tire rotation for this Thursday", "response": "I’ll book a tire rotation for this Thursday. What time?"}
84
+ {"input": "What’s the cost of new tires?", "response": "New tires cost $200-$400, depending on size. Want to book?"}
85
+ {"input": "I need an AC repair tomorrow", "response": "We can repair your AC tomorrow. What time works for you?"}
86
+ {"input": "Schedule a check-up for next Wednesday", "response": "I’ll book a check-up for next Wednesday. What time?"}
87
+ {"input": "My car’s making a weird sound", "response": "We can diagnose that for you. When can you bring it in?"}
88
+ {"input": "Can you do a transmission check?", "response": "Yes, we can check your transmission. When are you available?"}
89
+ {"input": "Book a car wash for this Sunday", "response": "I’ll book a car wash for this Sunday. What time suits you?"}
90
+ {"input": "How much for a headlight replacement?", "response": "Headlight replacement costs $30-$50 each. Want to book?"}
91
+ {"input": "I need my oil changed tomorrow", "response": "We can change your oil tomorrow. What time works?"}
92
+ {"input": "Schedule a tire rotation for next Monday", "response": "I’ll book a tire rotation for next Monday. What time?"}
93
+ {"input": "My brakes are squeaking", "response": "We can check your brakes. When can you bring your car in?"}
94
+ {"input": "Can you replace my battery today?", "response": "Let me check availability for today. Any preferred time?"}
95
+ {"input": "Book a detailing for this Friday", "response": "I’ll book a detailing for this Friday. What time?"}
96
+ {"input": "How long for an inspection?", "response": "An inspection takes about 1 hour. Want to schedule one?"}
97
+ {"input": "I need a new muffler", "response": "We can replace your muffler. When would you like to come in?"}
98
+ {"input": "Schedule an oil change for this Tuesday", "response": "I’ll book your oil change for this Tuesday. What time?"}
99
+ {"input": "My car won’t start", "response": "We can check that for you. When can you bring it in?"}
100
+ {"input": "Can you fix my AC this week?", "response": "Yes, we can fix your AC this week. Which day works?"}
101
+ {"input": "Book a brake job for next Thursday", "response": "I’ll book a brake job for next Thursday. What time?"}
102
+ {"input": "How much for a full service?", "response": "A full service costs $150-$250. Want to book one?"}
train.py CHANGED
@@ -1,29 +1,52 @@
1
  import torch
2
  from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer, DataCollatorForLanguageModeling
3
- from datasets import load_dataset
4
  import numpy as np
5
  from typing import Dict, List
6
  import os
 
7
 
8
  def load_and_prepare_data():
9
- # Load the dataset
10
- dataset = load_dataset("Victorano/customer-support-1k")
 
 
 
 
 
 
 
 
 
11
 
12
  # Load tokenizer
13
  tokenizer = AutoTokenizer.from_pretrained("microsoft/DialoGPT-medium")
14
 
15
  # Function to format conversations
16
  def format_conversation(example):
17
- # Combine question and answer into a single conversation
18
- conversation = f"Customer: {example['question']}\nSupport: {example['answer']}"
 
 
 
 
19
  return {"text": conversation}
20
 
21
- # Apply formatting to both train and test sets
22
- formatted_dataset = dataset.map(
 
 
 
 
 
23
  format_conversation,
24
- remove_columns=dataset["train"].column_names
25
  )
26
 
 
 
 
 
27
  # Tokenize the dataset
28
  def tokenize_function(examples):
29
  return tokenizer(
@@ -34,11 +57,18 @@ def load_and_prepare_data():
34
  return_tensors="pt"
35
  )
36
 
37
- tokenized_dataset = formatted_dataset.map(
38
- tokenize_function,
39
- batched=True,
40
- remove_columns=formatted_dataset["train"].column_names
41
- )
 
 
 
 
 
 
 
42
 
43
  return tokenized_dataset, tokenizer
44
 
 
1
  import torch
2
  from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer, DataCollatorForLanguageModeling
3
+ from datasets import load_dataset, Dataset, concatenate_datasets
4
  import numpy as np
5
  from typing import Dict, List
6
  import os
7
+ import json
8
 
9
  def load_and_prepare_data():
10
+ # Load the base customer support dataset
11
+ base_dataset = load_dataset("Victorano/customer-support-1k")
12
+
13
+ # Load custom car service data
14
+ car_service_data = []
15
+ with open('car_service_data.jsonl', 'r') as f:
16
+ for line in f:
17
+ car_service_data.append(json.loads(line))
18
+
19
+ # Convert car service data to the same format as the base dataset
20
+ car_service_dataset = Dataset.from_list(car_service_data)
21
 
22
  # Load tokenizer
23
  tokenizer = AutoTokenizer.from_pretrained("microsoft/DialoGPT-medium")
24
 
25
  # Function to format conversations
26
  def format_conversation(example):
27
+ if 'question' in example and 'answer' in example:
28
+ # Format for base dataset
29
+ conversation = f"Customer: {example['question']}\nSupport: {example['answer']}"
30
+ else:
31
+ # Format for car service data
32
+ conversation = f"Customer: {example['customer_query']}\nSupport: {example['support_response']}"
33
  return {"text": conversation}
34
 
35
+ # Apply formatting to both datasets
36
+ formatted_base_dataset = base_dataset.map(
37
+ format_conversation,
38
+ remove_columns=base_dataset["train"].column_names
39
+ )
40
+
41
+ formatted_car_dataset = car_service_dataset.map(
42
  format_conversation,
43
+ remove_columns=car_service_dataset.column_names
44
  )
45
 
46
+ # Combine datasets
47
+ combined_train = concatenate_datasets([formatted_base_dataset["train"], formatted_car_dataset])
48
+ combined_test = concatenate_datasets([formatted_base_dataset["test"], formatted_car_dataset])
49
+
50
  # Tokenize the dataset
51
  def tokenize_function(examples):
52
  return tokenizer(
 
57
  return_tensors="pt"
58
  )
59
 
60
+ tokenized_dataset = {
61
+ "train": combined_train.map(
62
+ tokenize_function,
63
+ batched=True,
64
+ remove_columns=combined_train.column_names
65
+ ),
66
+ "test": combined_test.map(
67
+ tokenize_function,
68
+ batched=True,
69
+ remove_columns=combined_test.column_names
70
+ )
71
+ }
72
 
73
  return tokenized_dataset, tokenizer
74