Spaces:
Sleeping
Sleeping
Ajit Panday
commited on
Commit
·
4dcbc6c
1
Parent(s):
0680865
Initial commit: Customer Support Chatbot with DialoGPT-medium
Browse files- car_service_data.jsonl +102 -0
- train.py +43 -13
car_service_data.jsonl
ADDED
|
@@ -0,0 +1,102 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"input": "I need an oil change", "response": "Sure, when would you like to schedule your oil change?"}
|
| 2 |
+
{"input": "Book a tire rotation for Friday", "response": "I can book your tire rotation for Friday. What time works for you?"}
|
| 3 |
+
{"input": "Can you check my brakes?", "response": "Yes, we can check your brakes. When would you like to come in?"}
|
| 4 |
+
{"input": "I want a car wash tomorrow", "response": "Great, I can schedule a car wash for tomorrow. What time suits you?"}
|
| 5 |
+
{"input": "How much for an oil change?", "response": "An oil change costs $40-$60, depending on your car. Want to book one?"}
|
| 6 |
+
{"input": "Schedule a tune-up next week", "response": "I’ll book a tune-up for next week. Which day and time work best?"}
|
| 7 |
+
{"input": "My car needs new tires", "response": "We can replace your tires. When would you like to bring your car in?"}
|
| 8 |
+
{"input": "Can I get a service today?", "response": "Let me check availability. What service do you need today?"}
|
| 9 |
+
{"input": "I need a battery replacement", "response": "We can replace your battery. When would you like to schedule it?"}
|
| 10 |
+
{"input": "What’s a full service?", "response": "A full service includes oil change, tire check, and more. Want to book one?"}
|
| 11 |
+
{"input": "Fix my AC on Monday", "response": "I’ll schedule an AC repair for Monday. What time works?"}
|
| 12 |
+
{"input": "How much for new brakes?", "response": "Brake replacement is $100-$200. Want to book it?"}
|
| 13 |
+
{"input": "I need an alignment", "response": "We can do an alignment. When would you like to come in?"}
|
| 14 |
+
{"input": "Book a service for Saturday", "response": "What service would you like to book for Saturday?"}
|
| 15 |
+
{"input": "Check my engine light", "response": "We can check your engine light. When are you available?"}
|
| 16 |
+
{"input": "How long for an oil change?", "response": "An oil change takes about 30 minutes. Want to schedule one?"}
|
| 17 |
+
{"input": "I need new wipers", "response": "We can replace your wipers. When would you like to stop by?"}
|
| 18 |
+
{"input": "Can you do a detailing?", "response": "Yes, we offer detailing. When would you like to schedule it?"}
|
| 19 |
+
{"input": "Book a brake job for next Tuesday", "response": "I’ll book your brake job for next Tuesday. What time?"}
|
| 20 |
+
{"input": "What’s included in a tune-up?", "response": "A tune-up includes spark plugs, filters, and more. Want to book one?"}
|
| 21 |
+
{"input": "Replace my spark plugs", "response": "We can replace your spark plugs. When would you like to come in?"}
|
| 22 |
+
{"input": "Book a transmission check for Wednesday", "response": "I’ll book a transmission check for Wednesday. What time?"}
|
| 23 |
+
{"input": "I need a headlight fixed", "response": "We can fix your headlight. When would you like to schedule it?"}
|
| 24 |
+
{"input": "How much for a tire rotation?", "response": "A tire rotation costs $20-$30. Want to book one?"}
|
| 25 |
+
{"input": "Schedule an inspection for this weekend", "response": "I’ll book an inspection for this weekend. Saturday or Sunday?"}
|
| 26 |
+
{"input": "My car’s overheating", "response": "We can check that for you. When can you bring it in?"}
|
| 27 |
+
{"input": "Can you top up my fluids?", "response": "Yes, we can top up your fluids. When would you like to come by?"}
|
| 28 |
+
{"input": "Book an oil change for next Thursday", "response": "I’ll book your oil change for next Thursday. What time?"}
|
| 29 |
+
{"input": "How long for a brake job?", "response": "A brake job takes about 1-2 hours. Want to schedule one?"}
|
| 30 |
+
{"input": "I need a wheel balancing", "response": "We can balance your wheels. When would you like to schedule it?"}
|
| 31 |
+
{"input": "Fix my muffler next Friday", "response": "I’ll schedule a muffler repair for next Friday. What time?"}
|
| 32 |
+
{"input": "How much for a battery replacement?", "response": "A battery replacement costs $80-$120. Want to book it?"}
|
| 33 |
+
{"input": "I need a belt replaced", "response": "We can replace your belt. When would you like to come in?"}
|
| 34 |
+
{"input": "Book a service for Monday morning", "response": "What service would you like to book for Monday morning?"}
|
| 35 |
+
{"input": "Check my tire pressure", "response": "We can check your tire pressure. When are you available?"}
|
| 36 |
+
{"input": "How long for a car wash?", "response": "A car wash takes about 20 minutes. Want to schedule one?"}
|
| 37 |
+
{"input": "I need a new air filter", "response": "We can replace your air filter. When would you like to stop by?"}
|
| 38 |
+
{"input": "Can you do an oil change this afternoon?", "response": "Let me check availability for this afternoon. Any specific time?"}
|
| 39 |
+
{"input": "Book a tire change for next Wednesday", "response": "I’ll book your tire change for next Wednesday. What time?"}
|
| 40 |
+
{"input": "What’s included in an inspection?", "response": "An inspection covers brakes, tires, and more. Want to book one?"}
|
| 41 |
+
{"input": "I need my brakes fixed", "response": "We can fix your brakes. When would you like to schedule it?"}
|
| 42 |
+
{"input": "Book an AC check for tomorrow", "response": "I’ll book an AC check for tomorrow. What time works for you?"}
|
| 43 |
+
{"input": "How much for a wheel alignment?", "response": "A wheel alignment costs $50-$80. Want to book one?"}
|
| 44 |
+
{"input": "Schedule a detailing for Saturday", "response": "I’ll book a detailing for Saturday. What time suits you?"}
|
| 45 |
+
{"input": "My engine’s making a noise", "response": "We can check your engine. When can you bring it in?"}
|
| 46 |
+
{"input": "Can you replace my headlights?", "response": "Yes, we can replace your headlights. When would you like to come by?"}
|
| 47 |
+
{"input": "Book a tune-up for next Monday", "response": "I’ll book your tune-up for next Monday. What time?"}
|
| 48 |
+
{"input": "How long for a tire rotation?", "response": "A tire rotation takes about 30 minutes. Want to schedule one?"}
|
| 49 |
+
{"input": "I need a new battery tomorrow", "response": "We can replace your battery tomorrow. What time works?"}
|
| 50 |
+
{"input": "Can you fix my wipers today?", "response": "Let me check availability for today. Any preferred time?"}
|
| 51 |
+
{"input": "Book an oil change for this Friday", "response": "I’ll book your oil change for this Friday. What time?"}
|
| 52 |
+
{"input": "What does a brake job cost?", "response": "A brake job costs $100-$200. Want to schedule one?"}
|
| 53 |
+
{"input": "I need my tires rotated", "response": "We can rotate your tires. When would you like to come in?"}
|
| 54 |
+
{"input": "Schedule a car wash for next Tuesday", "response": "I’ll book a car wash for next Tuesday. What time?"}
|
| 55 |
+
{"input": "My AC isn’t cooling", "response": "We can check your AC. When can you bring it in?"}
|
| 56 |
+
{"input": "Can you check my fluids?", "response": "Yes, we can check your fluids. When would you like to stop by?"}
|
| 57 |
+
{"input": "Book a brake check for Wednesday", "response": "I’ll book a brake check for Wednesday. What time?"}
|
| 58 |
+
{"input": "How much for a tune-up?", "response": "A tune-up costs $80-$150. Want to book one?"}
|
| 59 |
+
{"input": "I need an oil change next week", "response": "We can schedule your oil change for next week. Which day?"}
|
| 60 |
+
{"input": "Schedule a tire repair for tomorrow", "response": "I’ll book a tire repair for tomorrow. What time suits you?"}
|
| 61 |
+
{"input": "My car needs a check-up", "response": "We can do a full check-up. When would you like to come in?"}
|
| 62 |
+
{"input": "Can you fix my transmission?", "response": "Yes, we can check your transmission. When are you available?"}
|
| 63 |
+
{"input": "Book a detailing for next Thursday", "response": "I’ll book a detailing for next Thursday. What time?"}
|
| 64 |
+
{"input": "How long for a battery replacement?", "response": "A battery replacement takes about 30 minutes. Want to book?"}
|
| 65 |
+
{"input": "I need new spark plugs tomorrow", "response": "We can replace your spark plugs tomorrow. What time?"}
|
| 66 |
+
{"input": "Can you do a wheel alignment today?", "response": "Let me check availability for today. Any preferred time?"}
|
| 67 |
+
{"input": "Book an inspection for next Friday", "response": "I’ll book an inspection for next Friday. What time?"}
|
| 68 |
+
{"input": "What’s the cost of a car wash?", "response": "A car wash costs $15-$25. Want to schedule one?"}
|
| 69 |
+
{"input": "I need my brakes checked tomorrow", "response": "We can check your brakes tomorrow. What time works?"}
|
| 70 |
+
{"input": "Schedule an oil change for Saturday", "response": "I’ll book your oil change for Saturday. What time?"}
|
| 71 |
+
{"input": "My tires are flat", "response": "We can fix your tires. When can you bring your car in?"}
|
| 72 |
+
{"input": "Can you replace my air filter?", "response": "Yes, we can replace your air filter. When would you like to come?"}
|
| 73 |
+
{"input": "Book a tune-up for this Wednesday", "response": "I’ll book a tune-up for this Wednesday. What time?"}
|
| 74 |
+
{"input": "How much for an alignment?", "response": "An alignment costs $50-$80. Want to book one?"}
|
| 75 |
+
{"input": "I need a car wash next Monday", "response": "We can schedule a car wash for next Monday. What time?"}
|
| 76 |
+
{"input": "Schedule a brake job for tomorrow", "response": "I’ll book a brake job for tomorrow. What time suits you?"}
|
| 77 |
+
{"input": "My engine light’s on", "response": "We can check your engine light. When can you bring it in?"}
|
| 78 |
+
{"input": "Can you fix my headlights today?", "response": "Let me check availability for today. Any preferred time?"}
|
| 79 |
+
{"input": "Book an oil change for next Tuesday", "response": "I’ll book your oil change for next Tuesday. What time?"}
|
| 80 |
+
{"input": "How long for a detailing?", "response": "Detailing takes about 2-3 hours. Want to schedule one?"}
|
| 81 |
+
{"input": "I need new wipers next week", "response": "We can replace your wipers next week. Which day works?"}
|
| 82 |
+
{"input": "Can you check my battery?", "response": "Yes, we can check your battery. When would you like to stop by?"}
|
| 83 |
+
{"input": "Book a tire rotation for this Thursday", "response": "I’ll book a tire rotation for this Thursday. What time?"}
|
| 84 |
+
{"input": "What’s the cost of new tires?", "response": "New tires cost $200-$400, depending on size. Want to book?"}
|
| 85 |
+
{"input": "I need an AC repair tomorrow", "response": "We can repair your AC tomorrow. What time works for you?"}
|
| 86 |
+
{"input": "Schedule a check-up for next Wednesday", "response": "I’ll book a check-up for next Wednesday. What time?"}
|
| 87 |
+
{"input": "My car’s making a weird sound", "response": "We can diagnose that for you. When can you bring it in?"}
|
| 88 |
+
{"input": "Can you do a transmission check?", "response": "Yes, we can check your transmission. When are you available?"}
|
| 89 |
+
{"input": "Book a car wash for this Sunday", "response": "I’ll book a car wash for this Sunday. What time suits you?"}
|
| 90 |
+
{"input": "How much for a headlight replacement?", "response": "Headlight replacement costs $30-$50 each. Want to book?"}
|
| 91 |
+
{"input": "I need my oil changed tomorrow", "response": "We can change your oil tomorrow. What time works?"}
|
| 92 |
+
{"input": "Schedule a tire rotation for next Monday", "response": "I’ll book a tire rotation for next Monday. What time?"}
|
| 93 |
+
{"input": "My brakes are squeaking", "response": "We can check your brakes. When can you bring your car in?"}
|
| 94 |
+
{"input": "Can you replace my battery today?", "response": "Let me check availability for today. Any preferred time?"}
|
| 95 |
+
{"input": "Book a detailing for this Friday", "response": "I’ll book a detailing for this Friday. What time?"}
|
| 96 |
+
{"input": "How long for an inspection?", "response": "An inspection takes about 1 hour. Want to schedule one?"}
|
| 97 |
+
{"input": "I need a new muffler", "response": "We can replace your muffler. When would you like to come in?"}
|
| 98 |
+
{"input": "Schedule an oil change for this Tuesday", "response": "I’ll book your oil change for this Tuesday. What time?"}
|
| 99 |
+
{"input": "My car won’t start", "response": "We can check that for you. When can you bring it in?"}
|
| 100 |
+
{"input": "Can you fix my AC this week?", "response": "Yes, we can fix your AC this week. Which day works?"}
|
| 101 |
+
{"input": "Book a brake job for next Thursday", "response": "I’ll book a brake job for next Thursday. What time?"}
|
| 102 |
+
{"input": "How much for a full service?", "response": "A full service costs $150-$250. Want to book one?"}
|
train.py
CHANGED
|
@@ -1,29 +1,52 @@
|
|
| 1 |
import torch
|
| 2 |
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer, DataCollatorForLanguageModeling
|
| 3 |
-
from datasets import load_dataset
|
| 4 |
import numpy as np
|
| 5 |
from typing import Dict, List
|
| 6 |
import os
|
|
|
|
| 7 |
|
| 8 |
def load_and_prepare_data():
|
| 9 |
-
# Load the dataset
|
| 10 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
|
| 12 |
# Load tokenizer
|
| 13 |
tokenizer = AutoTokenizer.from_pretrained("microsoft/DialoGPT-medium")
|
| 14 |
|
| 15 |
# Function to format conversations
|
| 16 |
def format_conversation(example):
|
| 17 |
-
|
| 18 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
return {"text": conversation}
|
| 20 |
|
| 21 |
-
# Apply formatting to both
|
| 22 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
format_conversation,
|
| 24 |
-
remove_columns=
|
| 25 |
)
|
| 26 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
# Tokenize the dataset
|
| 28 |
def tokenize_function(examples):
|
| 29 |
return tokenizer(
|
|
@@ -34,11 +57,18 @@ def load_and_prepare_data():
|
|
| 34 |
return_tensors="pt"
|
| 35 |
)
|
| 36 |
|
| 37 |
-
tokenized_dataset =
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 42 |
|
| 43 |
return tokenized_dataset, tokenizer
|
| 44 |
|
|
|
|
| 1 |
import torch
|
| 2 |
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer, DataCollatorForLanguageModeling
|
| 3 |
+
from datasets import load_dataset, Dataset, concatenate_datasets
|
| 4 |
import numpy as np
|
| 5 |
from typing import Dict, List
|
| 6 |
import os
|
| 7 |
+
import json
|
| 8 |
|
| 9 |
def load_and_prepare_data():
|
| 10 |
+
# Load the base customer support dataset
|
| 11 |
+
base_dataset = load_dataset("Victorano/customer-support-1k")
|
| 12 |
+
|
| 13 |
+
# Load custom car service data
|
| 14 |
+
car_service_data = []
|
| 15 |
+
with open('car_service_data.jsonl', 'r') as f:
|
| 16 |
+
for line in f:
|
| 17 |
+
car_service_data.append(json.loads(line))
|
| 18 |
+
|
| 19 |
+
# Convert car service data to the same format as the base dataset
|
| 20 |
+
car_service_dataset = Dataset.from_list(car_service_data)
|
| 21 |
|
| 22 |
# Load tokenizer
|
| 23 |
tokenizer = AutoTokenizer.from_pretrained("microsoft/DialoGPT-medium")
|
| 24 |
|
| 25 |
# Function to format conversations
|
| 26 |
def format_conversation(example):
|
| 27 |
+
if 'question' in example and 'answer' in example:
|
| 28 |
+
# Format for base dataset
|
| 29 |
+
conversation = f"Customer: {example['question']}\nSupport: {example['answer']}"
|
| 30 |
+
else:
|
| 31 |
+
# Format for car service data
|
| 32 |
+
conversation = f"Customer: {example['customer_query']}\nSupport: {example['support_response']}"
|
| 33 |
return {"text": conversation}
|
| 34 |
|
| 35 |
+
# Apply formatting to both datasets
|
| 36 |
+
formatted_base_dataset = base_dataset.map(
|
| 37 |
+
format_conversation,
|
| 38 |
+
remove_columns=base_dataset["train"].column_names
|
| 39 |
+
)
|
| 40 |
+
|
| 41 |
+
formatted_car_dataset = car_service_dataset.map(
|
| 42 |
format_conversation,
|
| 43 |
+
remove_columns=car_service_dataset.column_names
|
| 44 |
)
|
| 45 |
|
| 46 |
+
# Combine datasets
|
| 47 |
+
combined_train = concatenate_datasets([formatted_base_dataset["train"], formatted_car_dataset])
|
| 48 |
+
combined_test = concatenate_datasets([formatted_base_dataset["test"], formatted_car_dataset])
|
| 49 |
+
|
| 50 |
# Tokenize the dataset
|
| 51 |
def tokenize_function(examples):
|
| 52 |
return tokenizer(
|
|
|
|
| 57 |
return_tensors="pt"
|
| 58 |
)
|
| 59 |
|
| 60 |
+
tokenized_dataset = {
|
| 61 |
+
"train": combined_train.map(
|
| 62 |
+
tokenize_function,
|
| 63 |
+
batched=True,
|
| 64 |
+
remove_columns=combined_train.column_names
|
| 65 |
+
),
|
| 66 |
+
"test": combined_test.map(
|
| 67 |
+
tokenize_function,
|
| 68 |
+
batched=True,
|
| 69 |
+
remove_columns=combined_test.column_names
|
| 70 |
+
)
|
| 71 |
+
}
|
| 72 |
|
| 73 |
return tokenized_dataset, tokenizer
|
| 74 |
|