jusbook / app.py
Balaprime's picture
Update app.py
71dba68 verified
# import sys
# import re
# import json
# import os
# from datasets import Dataset
# from transformers import (
# DistilBertTokenizerFast,
# DistilBertForSequenceClassification,
# Trainer,
# TrainingArguments,
# pipeline
# )
# import gradio as gr
# # --- Training Section ---
# def train_model():
# # Create small custom dataset
# data = {
# "train": [
# # Greet (label 0)
# {"text": "hi", "label": 0}, {"text": "hello", "label": 0}, {"text": "hey there", "label": 0},
# {"text": "good morning", "label": 0}, {"text": "greetings", "label": 0}, {"text": "hi bot", "label": 0},
# {"text": "hello jusbook", "label": 0}, {"text": "hey", "label": 0}, {"text": "start chat", "label": 0},
# {"text": "yo", "label": 0},
# # Services (label 1)
# {"text": "what services do you offer", "label": 1}, {"text": "list services", "label": 1},
# {"text": "what can I book", "label": 1}, {"text": "services available", "label": 1},
# {"text": "tell me about your services", "label": 1}, {"text": "what do you provide", "label": 1},
# {"text": "service list", "label": 1}, {"text": "offerings", "label": 1},
# {"text": "jusbook services", "label": 1}, {"text": "what's offered", "label": 1},
# # Contact (label 2)
# {"text": "contact info", "label": 2}, {"text": "how to reach you", "label": 2},
# {"text": "support contact", "label": 2}, {"text": "email address", "label": 2},
# {"text": "phone number", "label": 2}, {"text": "address", "label": 2},
# {"text": "get in touch", "label": 2}, {"text": "contact details", "label": 2},
# {"text": "support info", "label": 2}, {"text": "how to contact", "label": 2},
# # Available slots (label 3)
# {"text": "available slots", "label": 3}, {"text": "booking times", "label": 3},
# {"text": "what slots are free", "label": 3}, {"text": "show available bookings", "label": 3},
# {"text": "free slots", "label": 3}, {"text": "appointment times", "label": 3},
# {"text": "available appointments", "label": 3}, {"text": "slots for booking", "label": 3},
# {"text": "when can I book", "label": 3}, {"text": "open slots", "label": 3},
# # Book slot (label 4)
# {"text": "book a slot", "label": 4}, {"text": "reserve monday 10am", "label": 4},
# {"text": "book tuesday", "label": 4}, {"text": "schedule appointment", "label": 4},
# {"text": "reserve a time", "label": 4}, {"text": "book now", "label": 4},
# {"text": "make a booking", "label": 4}, {"text": "slot reservation", "label": 4},
# {"text": "book wednesday 9am", "label": 4}, {"text": "reserve slot", "label": 4},
# # Broadcast (label 5)
# {"text": "broadcast slots", "label": 5}, {"text": "announce available slots", "label": 5},
# {"text": "slot broadcast", "label": 5}, {"text": "share slots", "label": 5},
# {"text": "announce bookings", "label": 5}, {"text": "broadcast info", "label": 5},
# {"text": "publicize slots", "label": 5}, {"text": "slot announcement", "label": 5},
# {"text": "tell everyone about slots", "label": 5}, {"text": "broadcast", "label": 5},
# # Upcoming (label 6)
# {"text": "upcoming events", "label": 6}, {"text": "future bookings", "label": 6},
# {"text": "what's coming up", "label": 6}, {"text": "upcoming appointments", "label": 6},
# {"text": "events soon", "label": 6}, {"text": "next events", "label": 6},
# {"text": "upcoming info", "label": 6}, {"text": "future events", "label": 6},
# {"text": "what bookings are upcoming", "label": 6}, {"text": "events list", "label": 6},
# ]
# }
# dataset = Dataset.from_dict({
# "text": [item["text"] for item in data["train"]],
# "label": [item["label"] for item in data["train"]]
# })
# tokenizer = DistilBertTokenizerFast.from_pretrained("distilbert-base-uncased")
# model = DistilBertForSequenceClassification.from_pretrained("distilbert-base-uncased", num_labels=7)
# def tokenize(batch):
# return tokenizer(batch["text"], padding=True, truncation=True, return_tensors="pt")
# dataset = dataset.map(tokenize, batched=True)
# dataset.set_format("torch", columns=["input_ids", "attention_mask", "label"])
# training_args = TrainingArguments(
# output_dir="./results",
# num_train_epochs=3,
# per_device_train_batch_size=8,
# logging_steps=10,
# save_steps=50,
# evaluation_strategy="no",
# learning_rate=5e-5,
# )
# trainer = Trainer(model=model, args=training_args, train_dataset=dataset)
# trainer.train()
# model.save_pretrained("./trained_model")
# tokenizer.save_pretrained("./trained_model")
# # --- Chatbot Class ---
# class Chatbot:
# def __init__(self):
# model_path = os.path.abspath("trained_model")
# self.model = DistilBertForSequenceClassification.from_pretrained(model_path)
# self.tokenizer = DistilBertTokenizerFast.from_pretrained(model_path)
# self.classifier = pipeline("text-classification", model=self.model, tokenizer=self.tokenizer)
# self.intent_map = {
# "LABEL_0": "greet",
# "LABEL_1": "services",
# "LABEL_2": "contact",
# "LABEL_3": "available_slots",
# "LABEL_4": "book_slot",
# "LABEL_5": "broadcast",
# "LABEL_6": "upcoming"
# }
# self.services = ["Legal Consultation", "Document Review", "Appointment Booking", "Virtual Meetings"]
# self.contact_info = {"Email": "support@jusbook.com", "Phone": "+1-123-456-7890", "Address": "123 Justice Lane, Law City, USA"}
# self.available_slots = ["Monday 10AM-12PM", "Tuesday 2PM-4PM", "Wednesday 9AM-11AM"]
# self.upcoming_events = ["Webinar on Legal Rights: Sept 20, 2025", "Free Consultation Day: Oct 1, 2025"]
# self.booked_slots = {}
# def preprocess(self, text):
# return text.lower().strip()
# def get_response(self, user_input):
# processed = self.preprocess(user_input)
# prediction = self.classifier(processed)[0]
# intent = self.intent_map.get(prediction["label"], "unknown")
# if intent == "greet":
# return "Hello! Welcome to jusbook.com. How can I help you today?"
# elif intent == "services":
# return "We offer:\n" + "\n".join(f"- {s}" for s in self.services)
# elif intent == "contact":
# return "Contact:\n" + "\n".join(f"- {k}: {v}" for k, v in self.contact_info.items())
# elif intent == "available_slots":
# return "Available slots:\n" + "\n".join(f"- {s}" for s in self.available_slots)
# elif intent == "book_slot":
# match = re.search(r'(monday|tuesday|wednesday)\s+(\d+am|\d+pm)-(\d+am|\d+pm)', processed)
# if match:
# slot = f"{match.group(1).capitalize()} {match.group(2).upper()}-{match.group(3).upper()}"
# if slot in self.available_slots:
# self.available_slots.remove(slot)
# self.booked_slots["user"] = slot
# return f"Booked: {slot}. Confirmed!"
# return "Slot not available."
# return "Specify slot, e.g., 'book Monday 10AM-12PM'."
# elif intent == "broadcast":
# return "Broadcast: Available slots:\n" + "\n".join(f"- {s}" for s in self.available_slots)
# elif intent == "upcoming":
# return "Upcoming:\n" + "\n".join(f"- {e}" for e in self.upcoming_events)
# else:
# return "Sorry, didn't understand. Ask about services, slots, etc."
# # --- Gradio App ---
# def run_gradio():
# bot = Chatbot()
# with gr.Blocks() as demo:
# gr.Markdown("# πŸ€– JusBook Chatbot")
# chatbot_ui = gr.Chatbot()
# msg = gr.Textbox(placeholder="Type your message here...")
# clear = gr.Button("Clear Chat")
# def respond(message, chat_history):
# response = bot.get_response(message)
# chat_history.append((message, response))
# return "", chat_history
# msg.submit(respond, [msg, chatbot_ui], [msg, chatbot_ui])
# clear.click(lambda: None, None, chatbot_ui, queue=False)
# demo.launch()
# # --- Main Execution ---
# if __name__ == "__main__":
# if len(sys.argv) > 1 and sys.argv[1] == "--train":
# train_model()
# else:
# run_gradio()
# import sys
# import re
# import json
# import os
# from datasets import Dataset
# from transformers import (
# DistilBertTokenizerFast,
# DistilBertForSequenceClassification,
# pipeline
# )
# import gradio as gr
# import torch # Add this import
# # --- Chatbot Class (No Training Here) ---
# class Chatbot:
# def __init__(self):
# # Load from Hub: Replace 'your-username/fine-tuned-jusbook-model' with your pushed model
# # For now, use a base model; fine-tune and push for custom intents
# model_name = "distilbert-base-uncased" # Or your fine-tuned repo
# self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# self.model = DistilBertForSequenceClassification.from_pretrained(
# model_name, num_labels=7
# ).to(self.device)
# self.tokenizer = DistilBertTokenizerFast.from_pretrained(model_name)
# self.model.eval() # Inference mode
# self.classifier = pipeline(
# "text-classification",
# model=self.model,
# tokenizer=self.tokenizer,
# device=0 if self.device.type == 'cuda' else -1 # -1 for CPU
# )
# self.intent_map = {
# "LABEL_0": "greet",
# "LABEL_1": "services",
# "LABEL_2": "contact",
# "LABEL_3": "available_slots",
# "LABEL_4": "book_slot",
# "LABEL_5": "broadcast",
# "LABEL_6": "upcoming"
# }
# self.services = ["Legal Consultation", "Document Review", "Appointment Booking", "Virtual Meetings"]
# self.contact_info = {"Email": "support@jusbook.com", "Phone": "+1-123-456-7890", "Address": "123 Justice Lane, Law City, USA"}
# self.available_slots = ["Monday 10AM-12PM", "Tuesday 2PM-4PM", "Wednesday 9AM-11AM"]
# self.upcoming_events = ["Webinar on Legal Rights: Sept 20, 2025", "Free Consultation Day: Oct 1, 2025"]
# self.booked_slots = {}
# def preprocess(self, text):
# return text.lower().strip()
# def get_response(self, user_input):
# try:
# processed = self.preprocess(user_input)
# prediction = self.classifier(processed)[0]
# intent = self.intent_map.get(prediction["label"], "unknown")
# if intent == "greet":
# return "Hello! Welcome to jusbook.com. How can I help you today?"
# elif intent == "services":
# return "We offer:\n" + "\n".join(f"- {s}" for s in self.services)
# elif intent == "contact":
# return "Contact:\n" + "\n".join(f"- {k}: {v}" for k, v in self.contact_info.items())
# elif intent == "available_slots":
# return "Available slots:\n" + "\n".join(f"- {s}" for s in self.available_slots)
# elif intent == "book_slot":
# match = re.search(r'(monday|tuesday|wednesday)\s+(\d+am|\d+pm)-(\d+am|\d+pm)', processed)
# if match:
# slot = f"{match.group(1).capitalize()} {match.group(2).upper()}-{match.group(3).upper()}"
# if slot in self.available_slots:
# self.available_slots.remove(slot)
# self.booked_slots["user"] = slot
# return f"Booked: {slot}. Confirmed!"
# return "Slot not available."
# return "Specify slot, e.g., 'book Monday 10AM-12PM'."
# elif intent == "broadcast":
# return "Broadcast: Available slots:\n" + "\n".join(f"- {s}" for s in self.available_slots)
# elif intent == "upcoming":
# return "Upcoming:\n" + "\n".join(f"- {e}" for e in self.upcoming_events)
# else:
# return "Sorry, didn't understand. Ask about services, slots, etc."
# except Exception as e:
# print(f"Error in get_response: {e}") # Logs to Space console
# return "Sorry, something went wrong. Try rephrasing!"
# # --- Gradio App ---
# def run_gradio():
# bot = Chatbot() # Pre-load here
# with gr.Blocks() as demo:
# gr.Markdown("# πŸ€– JusBook Chatbot")
# chatbot_ui = gr.Chatbot()
# msg = gr.Textbox(placeholder="Type your message here...")
# clear = gr.Button("Clear Chat")
# def respond(message, chat_history):
# response = bot.get_response(message)
# chat_history.append((message, response))
# return "", chat_history
# msg.submit(respond, [msg, chatbot_ui], [msg, chatbot_ui])
# clear.click(lambda: None, None, chatbot_ui, queue=False)
# demo.queue() # Enable queue for Spaces
# if __name__ == "__main__":
# demo.launch()
# # --- Main Execution ---
# if __name__ == "__main__":
# run_gradio()
# import sys
# import re
# import os
# import torch
# import gradio as gr
# from datasets import Dataset
# from transformers import (
# DistilBertTokenizerFast,
# DistilBertForSequenceClassification,
# Trainer,
# TrainingArguments,
# pipeline
# )
# # --- Training Data ---
# def get_dataset():
# data = {
# "train": [
# # Greet (label 0)
# {"text": "hi", "label": 0}, {"text": "hello", "label": 0}, {"text": "hey there", "label": 0},
# {"text": "good morning", "label": 0}, {"text": "greetings", "label": 0}, {"text": "hi bot", "label": 0},
# {"text": "hello jusbook", "label": 0}, {"text": "hey", "label": 0}, {"text": "start chat", "label": 0},
# {"text": "yo", "label": 0}, {"text": "helo", "label": 0}, {"text": "hii", "label": 0},
# # Services (label 1)
# {"text": "what services do you offer", "label": 1}, {"text": "list services", "label": 1},
# {"text": "what can I book", "label": 1}, {"text": "services available", "label": 1},
# {"text": "tell me about your services", "label": 1}, {"text": "what do you provide", "label": 1},
# {"text": "service list", "label": 1}, {"text": "offerings", "label": 1},
# {"text": "jusbook services", "label": 1}, {"text": "what's offered", "label": 1},
# {"text": "srvices", "label": 1}, {"text": "what u offer", "label": 1},
# # Contact (label 2)
# {"text": "contact info", "label": 2}, {"text": "how to reach you", "label": 2},
# {"text": "support contact", "label": 2}, {"text": "email address", "label": 2},
# {"text": "phone number", "label": 2}, {"text": "address", "label": 2},
# {"text": "get in touch", "label": 2}, {"text": "contact details", "label": 2},
# {"text": "support info", "label": 2}, {"text": "how to contact", "label": 2},
# {"text": "cntact", "label": 2}, {"text": "email?", "label": 2},
# # Available slots (label 3)
# {"text": "available slots", "label": 3}, {"text": "booking times", "label": 3},
# {"text": "what slots are free", "label": 3}, {"text": "show available bookings", "label": 3},
# {"text": "free slots", "label": 3}, {"text": "appointment times", "label": 3},
# {"text": "available appointments", "label": 3}, {"text": "slots for booking", "label": 3},
# {"text": "when can I book", "label": 3}, {"text": "open slots", "label": 3},
# {"text": "free slot?", "label": 3}, {"text": "slots plz", "label": 3},
# # Book slot (label 4)
# {"text": "book a slot", "label": 4}, {"text": "reserve monday 10am-12pm", "label": 4},
# {"text": "book tuesday", "label": 4}, {"text": "schedule appointment", "label": 4},
# {"text": "reserve a time", "label": 4}, {"text": "book now", "label": 4},
# {"text": "make a booking", "label": 4}, {"text": "slot reservation", "label": 4},
# {"text": "book wednesday 9am-11am", "label": 4}, {"text": "reserve slot", "label": 4},
# {"text": "book mon 10am", "label": 4}, {"text": "reserve tues 2pm", "label": 4},
# # Broadcast (label 5)
# {"text": "broadcast slots", "label": 5}, {"text": "announce available slots", "label": 5},
# {"text": "slot broadcast", "label": 5}, {"text": "share slots", "label": 5},
# {"text": "announce bookings", "label": 5}, {"text": "broadcast info", "label": 5},
# {"text": "publicize slots", "label": 5}, {"text": "slot announcement", "label": 5},
# {"text": "tell everyone about slots", "label": 5}, {"text": "broadcast", "label": 5},
# {"text": "announce slot", "label": 5}, {"text": "share slot plz", "label": 5},
# # Upcoming (label 6)
# {"text": "upcoming events", "label": 6}, {"text": "future bookings", "label": 6},
# {"text": "what's coming up", "label": 6}, {"text": "upcoming appointments", "label": 6},
# {"text": "events soon", "label": 6}, {"text": "next events", "label": 6},
# {"text": "upcoming info", "label": 6}, {"text": "future events", "label": 6},
# {"text": "what bookings are upcoming", "label": 6}, {"text": "events list", "label": 6},
# {"text": "whats next", "label": 6}, {"text": "upcoming plz", "label": 6}
# ]
# }
# return Dataset.from_dict({
# "text": [item["text"] for item in data["train"]],
# "label": [item["label"] for item in data["train"]]
# })
# # --- Model Training ---
# def train_model():
# dataset = get_dataset()
# tokenizer = DistilBertTokenizerFast.from_pretrained("distilbert-base-uncased")
# model = DistilBertForSequenceClassification.from_pretrained("distilbert-base-uncased", num_labels=7)
# def tokenize(batch):
# return tokenizer(batch["text"], padding=True, truncation=True, return_tensors="pt")
# dataset = dataset.map(tokenize, batched=True)
# dataset.set_format("torch", columns=["input_ids", "attention_mask", "label"])
# training_args = TrainingArguments(
# output_dir="./results",
# num_train_epochs=3,
# per_device_train_batch_size=8,
# logging_steps=10,
# save_steps=50,
# evaluation_strategy="no",
# learning_rate=5e-5,
# )
# trainer = Trainer(model=model, args=training_args, train_dataset=dataset)
# trainer.train()
# model.save_pretrained("./trained_model")
# tokenizer.save_pretrained("./trained_model")
# print("Model trained and saved to ./trained_model")
# return "βœ… Model training completed! Saved to ./trained_model"
# # --- Model Loading ---
# def load_model():
# model_path = "./trained_model" # Use fine-tuned model if available
# if not os.path.exists(model_path):
# model_path = "distilbert-base-uncased" # Fallback to base model
# print(f"⚠️ Using base model (no trained model found at {model_path})")
# device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# model = DistilBertForSequenceClassification.from_pretrained(model_path, num_labels=7).to(device)
# tokenizer = DistilBertTokenizerFast.from_pretrained(model_path)
# model.eval()
# classifier = pipeline(
# "text-classification",
# model=model,
# tokenizer=tokenizer,
# device=0 if device.type == 'cuda' else -1
# )
# return classifier, device
# # --- Rule-Based Fallback ---
# def rule_based_classify(text):
# text_lower = text.lower()
# if any(word in text_lower for word in ["hi", "hello", "hey", "greet", "helo", "hii"]):
# return 0 # greet
# elif any(word in text_lower for word in ["service", "offer", "book", "list", "srvices"]):
# return 1 # services
# elif any(word in text_lower for word in ["contact", "email", "phone", "address", "cntact"]):
# return 2 # contact
# elif any(word in text_lower for word in ["slot", "available", "time", "appointment"]):
# return 3 # available_slots
# elif "book" in text_lower or "reserve" in text_lower:
# return 4 # book_slot
# elif any(word in text_lower for word in ["broadcast", "announce", "share"]):
# return 5 # broadcast
# elif any(word in text_lower for word in ["upcoming", "event", "future", "next"]):
# return 6 # upcoming
# return -1 # unknown
# # --- Intent Classification ---
# def classify_intent(text):
# try:
# classifier, _ = load_model()
# with torch.no_grad():
# prediction = classifier(text)[0]
# label = int(prediction["label"].split("_")[1])
# return label
# except Exception as e:
# print(f"Model error: {e}, using rule-based fallback")
# return rule_based_classify(text) # Fallback
# # --- Response Logic ---
# class ChatbotResponses:
# def __init__(self):
# self.intent_map = {
# 0: "greet", 1: "services", 2: "contact", 3: "available_slots",
# 4: "book_slot", 5: "broadcast", 6: "upcoming"
# }
# self.services = ["Legal Consultation", "Document Review", "Appointment Booking", "Virtual Meetings"]
# self.contact_info = {"Email": "support@jusbook.com", "Phone": "+1-123-456-7890", "Address": "123 Justice Lane, Law City, USA"}
# self.available_slots = ["Monday 10AM-12PM", "Tuesday 2PM-4PM", "Wednesday 9AM-11AM"]
# self.upcoming_events = ["Webinar on Legal Rights: Sept 20, 2025", "Free Consultation Day: Oct 1, 2025"]
# self.booked_slots = {}
# def get_response(self, user_input, intent_label):
# intent = self.intent_map.get(intent_label, "unknown")
# processed = user_input.lower().strip()
# if intent == "greet":
# return "Hello! Welcome to jusbook.com. How can I help you today?"
# elif intent == "services":
# return "We offer:\n" + "\n".join(f"- {s}" for s in self.services)
# elif intent == "contact":
# return "Contact:\n" + "\n".join(f"- {k}: {v}" for k, v in self.contact_info.items())
# elif intent == "available_slots":
# return "Available slots:\n" + "\n".join(f"- {s}" for s in self.available_slots)
# elif intent == "broadcast":
# return "πŸ”Š **Broadcast Message**: Available slots:\n" + "\n".join(f"- {s}" for s in self.available_slots)
# elif intent == "upcoming":
# return "Upcoming Events:\n" + "\n".join(f"- {e}" for e in self.upcoming_events)
# elif intent == "book_slot":
# pattern = r'(monday|tuesday|wednesday)\s+(\d+am?|\d+pm?)-(\d+am?|\d+pm?)'
# match = re.search(pattern, processed, re.IGNORECASE)
# if match:
# day = match.group(1).capitalize()
# start = match.group(2).upper()
# end = match.group(3).upper()
# slot = f"{day} {start}-{end}"
# if slot in self.available_slots:
# self.available_slots.remove(slot)
# self.booked_slots["user"] = slot
# return f"βœ… **Booking Confirmed!**\n\n**Slot**: {slot}\n**Status**: Reserved for you!"
# return f"❌ **Slot Unavailable**\n\nThe requested slot '{slot}' is no longer available."
# # Show available slots and prompt for specific booking
# return "Available slots:\n" + "\n".join(f"- {s}" for s in self.available_slots) + "\n\n**To book**: Type 'book [day] [time]', e.g., 'book Monday 10AM-12PM'"
# else:
# return "Sorry, I didn't understand that. You can ask me about:\n- Services\n- Contact info\n- Available slots\n- Booking a slot\n- Upcoming events\n\nTry something like 'hello', 'services', or 'book Monday 10AM-12PM'"
# # Global chatbot instance
# bot = ChatbotResponses()
# # --- Testing Function ---
# def test_chatbot():
# """Run comprehensive tests for the chatbot"""
# test_cases = [
# ("hello", 0, "Hello! Welcome to jusbook.com"),
# ("what services", 1, "We offer:\n- Legal Consultation"),
# ("contact info", 2, "Contact:\n- Email: support@jusbook.com"),
# ("available slots", 3, "Available slots:\n- Monday 10AM-12PM"),
# ("book monday 10am-12pm", 4, "Booking Confirmed"),
# ("broadcast", 5, "Broadcast Message"),
# ("upcoming events", 6, "Upcoming Events"),
# ("random nonsense", -1, "Sorry, I didn't understand")
# ]
# results = []
# for input_text, expected_intent, expected_response_contains in test_cases:
# try:
# intent = classify_intent(input_text)
# response = bot.get_response(input_text, intent)
# intent_pass = "βœ…" if intent == expected_intent else "❌"
# response_pass = "βœ…" if expected_response_contains in response else "❌"
# results.append({
# "Input": input_text,
# "Expected Intent": expected_intent,
# "Got Intent": intent,
# "Intent Pass": intent_pass,
# "Response": response[:100] + "..." if len(response) > 100 else response,
# "Response Pass": response_pass
# })
# except Exception as e:
# results.append({
# "Input": input_text,
# "Error": str(e),
# "Status": "❌ Failed"
# })
# # Print results
# print("\n" + "="*80)
# print("πŸ§ͺ CHATBOT TEST RESULTS")
# print("="*80)
# all_passed = True
# for result in results:
# if "Error" in result:
# print(f"❌ {result['Input']}: {result['Error']}")
# all_passed = False
# else:
# intent_status = result["Intent Pass"]
# response_status = result["Response Pass"]
# status = "βœ…" if intent_status == "βœ…" and response_status == "βœ…" else "⚠️"
# print(f"{status} '{result['Input']}' β†’ Intent: {result['Got Intent']} ({intent_status}) | Response: {response_status}")
# if intent_status != "βœ…" or response_status != "βœ…":
# all_passed = False
# if all_passed:
# print("\nπŸŽ‰ All tests passed! The chatbot is ready to use.")
# else:
# print("\n⚠️ Some tests failed. Check the results above.")
return results
# --- Gradio Chat Function ---
# def chat_response(message, history):
# """Generate chatbot response for Gradio interface"""
# if not message.strip():
# return history, ""
# try:
# # Classify intent
# intent = classify_intent(message)
# # Generate response
# response = bot.get_response(message, intent)
# # Add to history
# history.append([message, response])
# return history, ""
# except Exception as e:
# error_msg = f"Sorry, something went wrong: {str(e)}"
# history.append([message, error_msg])
# return history, ""
# # --- Gradio Interface ---
# def create_gradio_interface():
# """Create the main Gradio interface"""
# with gr.Blocks(title="JusBook Chatbot", theme=gr.themes.Soft()) as demo:
# gr.Markdown(
# """
# # πŸ€– **JusBook Chatbot**
# *Your legal appointment assistant*
# Ask me about:
# - Available services
# - Contact information
# - Booking slots
# - Upcoming events
# **Try:** "hello", "services", "available slots", or "book Monday 10AM-12PM"
# """
# )
# # Chat interface
# chatbot = gr.Chatbot(
# height=500,
# show_label=False,
# avatar_images=("user-avatar.png", "bot-avatar.png")
# )
# msg = gr.Textbox(
# placeholder="Type your message here...",
# container=True,
# label="Your Message",
# scale=7
# )
# with gr.Row():
# submit_btn = gr.Button("Send", variant="primary", scale=1)
# clear_btn = gr.Button("Clear Chat", scale=1)
# test_btn = gr.Button("Run Tests", scale=1)
# Status messages
# status = gr.Textbox(label="Status", interactive=False)
# # Event handlers
# def submit_message(message, history):
# return chat_response(message, history)
# def clear_chat():
# return [], ""
# def run_tests():
# results = test_chatbot()
# # Convert results to chat format for display
# test_history = []
# for result in results[:5]: # Show first 5 tests
# if "Error" not in result:
# test_msg = f"**Test:** {result['Input']}\n**Intent:** {result['Got Intent']} ({result['Intent Pass']})\n**Response:** {result['Response'][:50]}..."
# test_history.append([test_msg, ""])
# return test_history, "πŸ§ͺ Test results displayed above. Check console for full output."
# msg.submit(submit_message, [msg, chatbot], [chatbot, msg])
# submit_btn.click(submit_message, [msg, chatbot], [chatbot, msg])
# clear_btn.click(clear_chat, None, [chatbot, msg])
# test_btn.click(run_tests, None, [chatbot, status])
# # Add examples
# gr.Examples(
# examples=[
# ["hello"],
# ["what services do you offer?"],
# ["show me available slots"],
# ["book Monday 10AM-12PM"],
# ["contact information"],
# ["upcoming events"],
# ["broadcast available slots"]
# ],
# inputs=msg,
# label="Quick Examples"
# )
# return demo
# # --- Training Interface ---
# def create_training_interface():
# """Create Gradio interface for model training"""
# with gr.Blocks(title="Model Training") as demo:
# gr.Markdown("# πŸ”„ **Model Training Dashboard**")
# gr.Markdown("Train your custom DistilBERT model for the JusBook chatbot")
# with gr.Row():
# train_btn = gr.Button("πŸš€ Start Training", variant="primary", size="lg")
# status = gr.Textbox(label="Training Status", interactive=False, lines=3)
# # Training output
# training_log = gr.Textbox(label="Training Log", lines=10, interactive=False)
# def run_training():
# try:
# log_content = []
# def log_callback(msg):
# log_content.append(msg)
# return "\n".join(log_content)
# print("Starting model training...")
# status_msg = train_model()
# log_content.append(status_msg)
# training_log.update(value="\n".join(log_content))
# status.update(value="βœ… Training completed successfully!")
# return log_content, "βœ… Training completed successfully!"
# except Exception as e:
# error_msg = f"❌ Training failed: {str(e)}"
# status.update(value=error_msg)
# return [error_msg], error_msg
# train_btn.click(
# run_training,
# outputs=[training_log, status]
# )
# gr.Markdown("### πŸ“Š Dataset Info")
# gr.Markdown(f"""
# **Training Examples:** {len(get_dataset())}
# **Intents:** 7 (Greet, Services, Contact, Slots, Booking, Broadcast, Upcoming)
# **Model:** DistilBERT-base (~66M parameters)
# **Epochs:** 3
# """)
# return demo
# # --- Main Execution ---
# def main():
# if len(sys.argv) > 1:
# if sys.argv[1] == "--train":
# print("πŸ”„ Starting model training...")
# train_model()
# elif sys.argv[1] == "--test":
# print("πŸ§ͺ Running chatbot tests...")
# test_chatbot()
# else:
# print("❓ Unknown command. Use --train or --test")
# sys.exit(1)
# else:
# # Launch Gradio interface
# demo = create_gradio_interface()
# demo.launch(
# share=True, # For public sharing
# server_name="0.0.0.0",
# server_port=7860,
# show_error=True
# )
# if __name__ == "__main__":
# main()
import sys
import re
import os
import torch
import gradio as gr
from datasets import Dataset
from transformers import (
DistilBertTokenizerFast,
DistilBertForSequenceClassification,
Trainer,
TrainingArguments,
pipeline
)
# --- Training Data --- (unchanged, your full dataset here)
def get_dataset():
data = {
"train": [
# Greet (label 0)
{"text": "hi", "label": 0}, {"text": "hello", "label": 0}, {"text": "hey there", "label": 0},
{"text": "good morning", "label": 0}, {"text": "greetings", "label": 0}, {"text": "hi bot", "label": 0},
{"text": "hello jusbook", "label": 0}, {"text": "hey", "label": 0}, {"text": "start chat", "label": 0},
{"text": "yo", "label": 0}, {"text": "helo", "label": 0}, {"text": "hii", "label": 0},
# Services (label 1)
{"text": "what services do you offer", "label": 1}, {"text": "list services", "label": 1},
{"text": "what can I book", "label": 1}, {"text": "services available", "label": 1},
{"text": "tell me about your services", "label": 1}, {"text": "what do you provide", "label": 1},
{"text": "service list", "label": 1}, {"text": "offerings", "label": 1},
{"text": "jusbook services", "label": 1}, {"text": "what's offered", "label": 1},
{"text": "srvices", "label": 1}, {"text": "what u offer", "label": 1},
# Contact (label 2)
{"text": "contact info", "label": 2}, {"text": "how to reach you", "label": 2},
{"text": "support contact", "label": 2}, {"text": "email address", "label": 2},
{"text": "phone number", "label": 2}, {"text": "address", "label": 2},
{"text": "get in touch", "label": 2}, {"text": "contact details", "label": 2},
{"text": "support info", "label": 2}, {"text": "how to contact", "label": 2},
{"text": "cntact", "label": 2}, {"text": "email?", "label": 2},
# Available slots (label 3)
{"text": "available slots", "label": 3}, {"text": "booking times", "label": 3},
{"text": "what slots are free", "label": 3}, {"text": "show available bookings", "label": 3},
{"text": "free slots", "label": 3}, {"text": "appointment times", "label": 3},
{"text": "available appointments", "label": 3}, {"text": "slots for booking", "label": 3},
{"text": "when can I book", "label": 3}, {"text": "open slots", "label": 3},
{"text": "free slot?", "label": 3}, {"text": "slots plz", "label": 3},
# Book slot (label 4)
{"text": "book a slot", "label": 4}, {"text": "reserve monday 10am-12pm", "label": 4},
{"text": "book tuesday", "label": 4}, {"text": "schedule appointment", "label": 4},
{"text": "reserve a time", "label": 4}, {"text": "book now", "label": 4},
{"text": "make a booking", "label": 4}, {"text": "slot reservation", "label": 4},
{"text": "book wednesday 9am-11am", "label": 4}, {"text": "reserve slot", "label": 4},
{"text": "book mon 10am", "label": 4}, {"text": "reserve tues 2pm", "label": 4},
# Broadcast (label 5)
{"text": "broadcast slots", "label": 5}, {"text": "announce available slots", "label": 5},
{"text": "slot broadcast", "label": 5}, {"text": "share slots", "label": 5},
{"text": "announce bookings", "label": 5}, {"text": "broadcast info", "label": 5},
{"text": "publicize slots", "label": 5}, {"text": "slot announcement", "label": 5},
{"text": "tell everyone about slots", "label": 5}, {"text": "broadcast", "label": 5},
{"text": "announce slot", "label": 5}, {"text": "share slot plz", "label": 5},
# Upcoming (label 6)
{"text": "upcoming events", "label": 6}, {"text": "future bookings", "label": 6},
{"text": "what's coming up", "label": 6}, {"text": "upcoming appointments", "label": 6},
{"text": "events soon", "label": 6}, {"text": "next events", "label": 6},
{"text": "upcoming info", "label": 6}, {"text": "future events", "label": 6},
{"text": "what bookings are upcoming", "label": 6}, {"text": "events list", "label": 6},
{"text": "whats next", "label": 6}, {"text": "upcoming plz", "label": 6}
]
}
return Dataset.from_dict({
"text": [item["text"] for item in data["train"]],
"label": [item["label"] for item in data["train"]]
})
# --- Model Training --- (unchanged)
def train_model():
dataset = get_dataset()
tokenizer = DistilBertTokenizerFast.from_pretrained("distilbert-base-uncased")
model = DistilBertForSequenceClassification.from_pretrained("distilbert-base-uncased", num_labels=7)
def tokenize(batch):
return tokenizer(batch["text"], padding=True, truncation=True, return_tensors="pt")
dataset = dataset.map(tokenize, batched=True)
dataset.set_format("torch", columns=["input_ids", "attention_mask", "label"])
training_args = TrainingArguments(
output_dir="./results",
num_train_epochs=3,
per_device_train_batch_size=8,
logging_steps=10,
save_steps=50,
evaluation_strategy="no",
learning_rate=5e-5,
)
trainer = Trainer(model=model, args=training_args, train_dataset=dataset)
trainer.train()
model.save_pretrained("./trained_model")
tokenizer.save_pretrained("./trained_model")
print("Model trained and saved to ./trained_model")
return "βœ… Model training completed! Saved to ./trained_model"
# --- Model Loading --- (unchanged)
def load_model():
model_path = "./trained_model"
if not os.path.exists(model_path):
model_path = "distilbert-base-uncased"
print(f"⚠️ Using base model (train first for better accuracy!)")
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = DistilBertForSequenceClassification.from_pretrained(model_path, num_labels=7).to(device)
tokenizer = DistilBertTokenizerFast.from_pretrained(model_path)
model.eval()
classifier = pipeline(
"text-classification",
model=model,
tokenizer=tokenizer,
device=0 if device.type == 'cuda' else -1
)
return classifier, device
# --- FIXED Rule-Based Fallback --- (Better keywords, first priority)
def rule_based_classify(text):
text_lower = text.lower()
print(f"DEBUG: Rule classify input: '{text_lower}'") # For debugging
if any(word in text_lower for word in ["hi", "hello", "hey", "greet", "helo", "hii", "good morning"]):
print("DEBUG: Matched greet")
return 0
elif any(word in text_lower for word in ["service", "offer", "list", "srvices", "what can i"]):
print("DEBUG: Matched services")
return 1
elif any(word in text_lower for word in ["contact", "email", "phone", "address", "cntact", "support"]):
print("DEBUG: Matched contact")
return 2
elif any(word in text_lower for word in ["available", "free slot", "open slot", "when can i book"]):
print("DEBUG: Matched available_slots")
return 3
elif any(word in text_lower for word in ["book", "reserve", "schedule", "booked slots", "my booking", "slots booked"]):
print("DEBUG: Matched book_slot")
return 4
elif any(word in text_lower for word in ["broadcast", "announce", "share"]):
print("DEBUG: Matched broadcast")
return 5
elif any(word in text_lower for word in ["upcoming", "event", "future", "next", "show my bookings"]):
print("DEBUG: Matched upcoming")
return 6
print("DEBUG: No match, unknown")
return -1
def classify_intent(text):
# Try rules first (reliable without training)
rule_intent = rule_based_classify(text)
if rule_intent != -1:
print(f"DEBUG: Using rule-based intent: {rule_intent}")
return rule_intent
# Fallback to model
try:
classifier, _ = load_model()
with torch.no_grad():
prediction = classifier(text)[0]
label_str = prediction["label"]
label = int(label_str.split("_")[1]) if "_" in label_str else int(label_str)
print(f"DEBUG: Model predicted: {label_str} (score: {prediction['score']:.2f})")
return label
except Exception as e:
print(f"DEBUG: Model error: {e}, using rules")
return rule_based_classify(text)
# --- FIXED Response Logic --- (Better booking handling, show booked slots)
class ChatbotResponses:
def __init__(self):
self.intent_map = {
0: "greet", 1: "services", 2: "contact", 3: "available_slots",
4: "book_slot", 5: "broadcast", 6: "upcoming"
}
self.services = ["Legal Consultation", "Document Review", "Appointment Booking", "Virtual Meetings"]
self.contact_info = {"Email": "support@jusbook.com", "Phone": "+1-123-456-7890", "Address": "123 Justice Lane, Law City, USA"}
self.available_slots = ["Monday 10AM-12PM", "Tuesday 2PM-4PM", "Wednesday 9AM-11AM"]
self.upcoming_events = ["Webinar on Legal Rights: Sept 20, 2025", "Free Consultation Day: Oct 1, 2025"]
self.booked_slots = {} # e.g., {"user": "Monday 10AM-12PM"}
def get_response(self, user_input, intent_label):
intent = self.intent_map.get(intent_label, "unknown")
processed = user_input.lower().strip()
print(f"DEBUG: Intent: {intent}, Input: {processed}") # Debug
if intent == "greet":
return "Hello! Welcome to jusbook.com. How can I help you today?"
elif intent == "services":
return "We offer:\n" + "\n".join(f"- {s}" for s in self.services)
elif intent == "contact":
return "Contact:\n" + "\n".join(f"- {k}: {v}" for k, v in self.contact_info.items())
elif intent == "available_slots":
return "Available slots:\n" + "\n".join(f"- {s}" for s in self.available_slots)
elif intent == "broadcast":
return "πŸ”Š **Broadcast Message**: Available slots:\n" + "\n".join(f"- {s}" for s in self.available_slots)
elif intent == "upcoming":
booked_info = "\n".join(f"- {user}: {slot}" for user, slot in self.booked_slots.items()) if self.booked_slots else "No upcoming bookings yet."
return f"Upcoming Events:\n" + "\n".join(f"- {e}" for e in self.upcoming_events) + f"\n\nYour Bookings:\n{booked_info}"
elif intent == "book_slot":
# Enhanced regex for booking
book_pattern = r'book\s+(monday|tuesday|wednesday)\s+(\d+am?|\d+pm?)-(\d+am?|\d+pm?)'
match = re.search(book_pattern, processed, re.IGNORECASE)
if match:
day = match.group(1).capitalize()
start = match.group(2).upper()
end = match.group(3).upper()
slot = f"{day} {start}-{end}"
if slot in self.available_slots:
self.available_slots.remove(slot)
self.booked_slots["user"] = slot # Simple user tracking
return f"βœ… **Booking Confirmed!**\n\n**Slot**: {slot}\n**Status**: Reserved for you!"
return f"❌ **Slot Unavailable**\n\nThe requested slot '{slot}' is no longer available."
# If no specific book, check if query is about booked slots
if any(word in processed for word in ["booked", "my booking", "slots booked"]):
if self.booked_slots:
booked_info = "\n".join(f"- {user}: {slot}" for user, slot in self.booked_slots.items())
return f"Your Booked Slots:\n{booked_info}\n\nAvailable: " + "\n".join(f"- {s}" for s in self.available_slots)
return "No slots booked yet. Available:\n" + "\n".join(f"- {s}" for s in self.available_slots)
# Default prompt
return "Available slots:\n" + "\n".join(f"- {s}" for s in self.available_slots) + "\n\n**To book**: Type 'book [day] [time]', e.g., 'book Monday 10AM-12PM'"
else:
return "Sorry, I didn't understand that. You can ask me about:\n- Services\n- Contact info\n- Available slots\n- Booking a slot (e.g., 'book Monday 10AM-12PM')\n- Upcoming events\n- Your booked slots"
# Global instance
bot = ChatbotResponses()
# --- UPDATED Testing Function --- (Added your failing cases)
def test_chatbot():
test_cases = [
("hello", 0, "Hello! Welcome"),
("contact info", 2, "Contact:\n- Email"),
("available slots", 3, "Available slots"),
("book monday 10am-12pm", 4, "Booking Confirmed"),
("slots booked", 4, "Your Booked Slots"), # New: Should match booking intent
("booked slots", 4, "No slots booked yet"), # New
("broadcast", 5, "Broadcast Message"),
("upcoming events", 6, "Upcoming Events"),
("random text", -1, "Sorry, I didn't understand")
]
results = []
for input_text, expected_intent, expected_response_contains in test_cases:
intent = classify_intent(input_text)
response = bot.get_response(input_text, intent)
intent_pass = "βœ…" if intent == expected_intent else "❌"
response_pass = "βœ…" if expected_response_contains in response else "❌"
results.append({
"Input": input_text,
"Expected Intent": expected_intent,
"Got Intent": intent,
"Intent Pass": intent_pass,
"Response": response[:100] + "..." if len(response) > 100 else response,
"Response Pass": response_pass
})
print(f"Test '{input_text}': Intent {intent} ({intent_pass}), Response has '{expected_response_contains}' ({response_pass})")
all_passed = all(r["Intent Pass"] == "βœ…" and r["Response Pass"] == "βœ…" for r in results)
print(f"\nπŸŽ‰ {'All' if all_passed else 'Some failed - check above'} tests passed!")
return results
# --- Gradio Chat Function --- (unchanged, but with debug prints)
def chat_response(message, history):
if not message.strip():
return history, ""
print(f"\n--- New Message: {message} ---") # Debug separator
try:
intent = classify_intent(message)
response = bot.get_response(message, intent)
history.append([message, response])
print(f"Response: {response[:50]}...") # Debug
return history, ""
except Exception as e:
error_msg = f"Sorry, something went wrong: {str(e)}"
history.append([message, error_msg])
return history, ""
# --- Gradio Interface --- (Added debug console link)
def create_gradio_interface():
with gr.Blocks(title="JusBook Chatbot", theme=gr.themes.Soft()) as demo:
gr.Markdown("# πŸ€– **JusBook Chatbot** (Debug Mode)")
gr.Markdown("*Check browser console for DEBUG logs*")
chatbot = gr.Chatbot(height=500)
msg = gr.Textbox(placeholder="Type your message here...", label="Your Message")
with gr.Row():
submit_btn = gr.Button("Send", variant="primary")
clear_btn = gr.Button("Clear Chat")
test_btn = gr.Button("Run Tests")
status = gr.Textbox(label="Status", interactive=False)
def submit_message(message, history):
return chat_response(message, history)
def clear_chat():
return [], ""
def run_tests():
results = test_chatbot()
test_history = [[r["Input"], f"Intent: {r['Got Intent']} | Pass: {r['Intent Pass'] + r['Response Pass']}"] for r in results]
return test_history, f"πŸ§ͺ Tests complete! All passed: {all(r['Intent Pass'] == 'βœ…' and r['Response Pass'] == 'βœ…' for r in results)}"
msg.submit(submit_message, [msg, chatbot], [chatbot, msg])
submit_btn.click(submit_message, [msg, chatbot], [chatbot, msg])
clear_btn.click(clear_chat, None, [chatbot, msg])
test_btn.click(run_tests, None, [chatbot, status])
gr.Examples(
examples=[
["hello"], ["contact info"], ["available slots"], ["book Monday 10AM-12PM"],
["slots booked"], ["booked slots"], ["upcoming events"]
],
inputs=msg,
label="Test These (Including Your Failing Ones)"
)
return demo
# --- Main Execution ---
def main():
if len(sys.argv) > 1:
if sys.argv[1] == "--train":
print("πŸ”„ Training...")
train_model()
elif sys.argv[1] == "--test":
print("πŸ§ͺ Testing...")
test_chatbot()
else:
print("Usage: python app.py [--train | --test]")
sys.exit(1)
else:
demo = create_gradio_interface()
demo.launch(share=True, server_port=7860, show_error=True)
if __name__ == "__main__":
main()