# import sys # import re # import json # import os # from datasets import Dataset # from transformers import ( # DistilBertTokenizerFast, # DistilBertForSequenceClassification, # Trainer, # TrainingArguments, # pipeline # ) # import gradio as gr # # --- Training Section --- # def train_model(): # # Create small custom dataset # data = { # "train": [ # # Greet (label 0) # {"text": "hi", "label": 0}, {"text": "hello", "label": 0}, {"text": "hey there", "label": 0}, # {"text": "good morning", "label": 0}, {"text": "greetings", "label": 0}, {"text": "hi bot", "label": 0}, # {"text": "hello jusbook", "label": 0}, {"text": "hey", "label": 0}, {"text": "start chat", "label": 0}, # {"text": "yo", "label": 0}, # # Services (label 1) # {"text": "what services do you offer", "label": 1}, {"text": "list services", "label": 1}, # {"text": "what can I book", "label": 1}, {"text": "services available", "label": 1}, # {"text": "tell me about your services", "label": 1}, {"text": "what do you provide", "label": 1}, # {"text": "service list", "label": 1}, {"text": "offerings", "label": 1}, # {"text": "jusbook services", "label": 1}, {"text": "what's offered", "label": 1}, # # Contact (label 2) # {"text": "contact info", "label": 2}, {"text": "how to reach you", "label": 2}, # {"text": "support contact", "label": 2}, {"text": "email address", "label": 2}, # {"text": "phone number", "label": 2}, {"text": "address", "label": 2}, # {"text": "get in touch", "label": 2}, {"text": "contact details", "label": 2}, # {"text": "support info", "label": 2}, {"text": "how to contact", "label": 2}, # # Available slots (label 3) # {"text": "available slots", "label": 3}, {"text": "booking times", "label": 3}, # {"text": "what slots are free", "label": 3}, {"text": "show available bookings", "label": 3}, # {"text": "free slots", "label": 3}, {"text": "appointment times", "label": 3}, # {"text": "available appointments", "label": 3}, {"text": "slots for booking", "label": 3}, # {"text": "when can I book", "label": 3}, {"text": "open slots", "label": 3}, # # Book slot (label 4) # {"text": "book a slot", "label": 4}, {"text": "reserve monday 10am", "label": 4}, # {"text": "book tuesday", "label": 4}, {"text": "schedule appointment", "label": 4}, # {"text": "reserve a time", "label": 4}, {"text": "book now", "label": 4}, # {"text": "make a booking", "label": 4}, {"text": "slot reservation", "label": 4}, # {"text": "book wednesday 9am", "label": 4}, {"text": "reserve slot", "label": 4}, # # Broadcast (label 5) # {"text": "broadcast slots", "label": 5}, {"text": "announce available slots", "label": 5}, # {"text": "slot broadcast", "label": 5}, {"text": "share slots", "label": 5}, # {"text": "announce bookings", "label": 5}, {"text": "broadcast info", "label": 5}, # {"text": "publicize slots", "label": 5}, {"text": "slot announcement", "label": 5}, # {"text": "tell everyone about slots", "label": 5}, {"text": "broadcast", "label": 5}, # # Upcoming (label 6) # {"text": "upcoming events", "label": 6}, {"text": "future bookings", "label": 6}, # {"text": "what's coming up", "label": 6}, {"text": "upcoming appointments", "label": 6}, # {"text": "events soon", "label": 6}, {"text": "next events", "label": 6}, # {"text": "upcoming info", "label": 6}, {"text": "future events", "label": 6}, # {"text": "what bookings are upcoming", "label": 6}, {"text": "events list", "label": 6}, # ] # } # dataset = Dataset.from_dict({ # "text": [item["text"] for item in data["train"]], # "label": [item["label"] for item in data["train"]] # }) # tokenizer = DistilBertTokenizerFast.from_pretrained("distilbert-base-uncased") # model = DistilBertForSequenceClassification.from_pretrained("distilbert-base-uncased", num_labels=7) # def tokenize(batch): # return tokenizer(batch["text"], padding=True, truncation=True, return_tensors="pt") # dataset = dataset.map(tokenize, batched=True) # dataset.set_format("torch", columns=["input_ids", "attention_mask", "label"]) # training_args = TrainingArguments( # output_dir="./results", # num_train_epochs=3, # per_device_train_batch_size=8, # logging_steps=10, # save_steps=50, # evaluation_strategy="no", # learning_rate=5e-5, # ) # trainer = Trainer(model=model, args=training_args, train_dataset=dataset) # trainer.train() # model.save_pretrained("./trained_model") # tokenizer.save_pretrained("./trained_model") # # --- Chatbot Class --- # class Chatbot: # def __init__(self): # model_path = os.path.abspath("trained_model") # self.model = DistilBertForSequenceClassification.from_pretrained(model_path) # self.tokenizer = DistilBertTokenizerFast.from_pretrained(model_path) # self.classifier = pipeline("text-classification", model=self.model, tokenizer=self.tokenizer) # self.intent_map = { # "LABEL_0": "greet", # "LABEL_1": "services", # "LABEL_2": "contact", # "LABEL_3": "available_slots", # "LABEL_4": "book_slot", # "LABEL_5": "broadcast", # "LABEL_6": "upcoming" # } # self.services = ["Legal Consultation", "Document Review", "Appointment Booking", "Virtual Meetings"] # self.contact_info = {"Email": "support@jusbook.com", "Phone": "+1-123-456-7890", "Address": "123 Justice Lane, Law City, USA"} # self.available_slots = ["Monday 10AM-12PM", "Tuesday 2PM-4PM", "Wednesday 9AM-11AM"] # self.upcoming_events = ["Webinar on Legal Rights: Sept 20, 2025", "Free Consultation Day: Oct 1, 2025"] # self.booked_slots = {} # def preprocess(self, text): # return text.lower().strip() # def get_response(self, user_input): # processed = self.preprocess(user_input) # prediction = self.classifier(processed)[0] # intent = self.intent_map.get(prediction["label"], "unknown") # if intent == "greet": # return "Hello! Welcome to jusbook.com. How can I help you today?" # elif intent == "services": # return "We offer:\n" + "\n".join(f"- {s}" for s in self.services) # elif intent == "contact": # return "Contact:\n" + "\n".join(f"- {k}: {v}" for k, v in self.contact_info.items()) # elif intent == "available_slots": # return "Available slots:\n" + "\n".join(f"- {s}" for s in self.available_slots) # elif intent == "book_slot": # match = re.search(r'(monday|tuesday|wednesday)\s+(\d+am|\d+pm)-(\d+am|\d+pm)', processed) # if match: # slot = f"{match.group(1).capitalize()} {match.group(2).upper()}-{match.group(3).upper()}" # if slot in self.available_slots: # self.available_slots.remove(slot) # self.booked_slots["user"] = slot # return f"Booked: {slot}. Confirmed!" # return "Slot not available." # return "Specify slot, e.g., 'book Monday 10AM-12PM'." # elif intent == "broadcast": # return "Broadcast: Available slots:\n" + "\n".join(f"- {s}" for s in self.available_slots) # elif intent == "upcoming": # return "Upcoming:\n" + "\n".join(f"- {e}" for e in self.upcoming_events) # else: # return "Sorry, didn't understand. Ask about services, slots, etc." # # --- Gradio App --- # def run_gradio(): # bot = Chatbot() # with gr.Blocks() as demo: # gr.Markdown("# ๐Ÿค– JusBook Chatbot") # chatbot_ui = gr.Chatbot() # msg = gr.Textbox(placeholder="Type your message here...") # clear = gr.Button("Clear Chat") # def respond(message, chat_history): # response = bot.get_response(message) # chat_history.append((message, response)) # return "", chat_history # msg.submit(respond, [msg, chatbot_ui], [msg, chatbot_ui]) # clear.click(lambda: None, None, chatbot_ui, queue=False) # demo.launch() # # --- Main Execution --- # if __name__ == "__main__": # if len(sys.argv) > 1 and sys.argv[1] == "--train": # train_model() # else: # run_gradio() # import sys # import re # import json # import os # from datasets import Dataset # from transformers import ( # DistilBertTokenizerFast, # DistilBertForSequenceClassification, # pipeline # ) # import gradio as gr # import torch # Add this import # # --- Chatbot Class (No Training Here) --- # class Chatbot: # def __init__(self): # # Load from Hub: Replace 'your-username/fine-tuned-jusbook-model' with your pushed model # # For now, use a base model; fine-tune and push for custom intents # model_name = "distilbert-base-uncased" # Or your fine-tuned repo # self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # self.model = DistilBertForSequenceClassification.from_pretrained( # model_name, num_labels=7 # ).to(self.device) # self.tokenizer = DistilBertTokenizerFast.from_pretrained(model_name) # self.model.eval() # Inference mode # self.classifier = pipeline( # "text-classification", # model=self.model, # tokenizer=self.tokenizer, # device=0 if self.device.type == 'cuda' else -1 # -1 for CPU # ) # self.intent_map = { # "LABEL_0": "greet", # "LABEL_1": "services", # "LABEL_2": "contact", # "LABEL_3": "available_slots", # "LABEL_4": "book_slot", # "LABEL_5": "broadcast", # "LABEL_6": "upcoming" # } # self.services = ["Legal Consultation", "Document Review", "Appointment Booking", "Virtual Meetings"] # self.contact_info = {"Email": "support@jusbook.com", "Phone": "+1-123-456-7890", "Address": "123 Justice Lane, Law City, USA"} # self.available_slots = ["Monday 10AM-12PM", "Tuesday 2PM-4PM", "Wednesday 9AM-11AM"] # self.upcoming_events = ["Webinar on Legal Rights: Sept 20, 2025", "Free Consultation Day: Oct 1, 2025"] # self.booked_slots = {} # def preprocess(self, text): # return text.lower().strip() # def get_response(self, user_input): # try: # processed = self.preprocess(user_input) # prediction = self.classifier(processed)[0] # intent = self.intent_map.get(prediction["label"], "unknown") # if intent == "greet": # return "Hello! Welcome to jusbook.com. How can I help you today?" # elif intent == "services": # return "We offer:\n" + "\n".join(f"- {s}" for s in self.services) # elif intent == "contact": # return "Contact:\n" + "\n".join(f"- {k}: {v}" for k, v in self.contact_info.items()) # elif intent == "available_slots": # return "Available slots:\n" + "\n".join(f"- {s}" for s in self.available_slots) # elif intent == "book_slot": # match = re.search(r'(monday|tuesday|wednesday)\s+(\d+am|\d+pm)-(\d+am|\d+pm)', processed) # if match: # slot = f"{match.group(1).capitalize()} {match.group(2).upper()}-{match.group(3).upper()}" # if slot in self.available_slots: # self.available_slots.remove(slot) # self.booked_slots["user"] = slot # return f"Booked: {slot}. Confirmed!" # return "Slot not available." # return "Specify slot, e.g., 'book Monday 10AM-12PM'." # elif intent == "broadcast": # return "Broadcast: Available slots:\n" + "\n".join(f"- {s}" for s in self.available_slots) # elif intent == "upcoming": # return "Upcoming:\n" + "\n".join(f"- {e}" for e in self.upcoming_events) # else: # return "Sorry, didn't understand. Ask about services, slots, etc." # except Exception as e: # print(f"Error in get_response: {e}") # Logs to Space console # return "Sorry, something went wrong. Try rephrasing!" # # --- Gradio App --- # def run_gradio(): # bot = Chatbot() # Pre-load here # with gr.Blocks() as demo: # gr.Markdown("# ๐Ÿค– JusBook Chatbot") # chatbot_ui = gr.Chatbot() # msg = gr.Textbox(placeholder="Type your message here...") # clear = gr.Button("Clear Chat") # def respond(message, chat_history): # response = bot.get_response(message) # chat_history.append((message, response)) # return "", chat_history # msg.submit(respond, [msg, chatbot_ui], [msg, chatbot_ui]) # clear.click(lambda: None, None, chatbot_ui, queue=False) # demo.queue() # Enable queue for Spaces # if __name__ == "__main__": # demo.launch() # # --- Main Execution --- # if __name__ == "__main__": # run_gradio() # import sys # import re # import os # import torch # import gradio as gr # from datasets import Dataset # from transformers import ( # DistilBertTokenizerFast, # DistilBertForSequenceClassification, # Trainer, # TrainingArguments, # pipeline # ) # # --- Training Data --- # def get_dataset(): # data = { # "train": [ # # Greet (label 0) # {"text": "hi", "label": 0}, {"text": "hello", "label": 0}, {"text": "hey there", "label": 0}, # {"text": "good morning", "label": 0}, {"text": "greetings", "label": 0}, {"text": "hi bot", "label": 0}, # {"text": "hello jusbook", "label": 0}, {"text": "hey", "label": 0}, {"text": "start chat", "label": 0}, # {"text": "yo", "label": 0}, {"text": "helo", "label": 0}, {"text": "hii", "label": 0}, # # Services (label 1) # {"text": "what services do you offer", "label": 1}, {"text": "list services", "label": 1}, # {"text": "what can I book", "label": 1}, {"text": "services available", "label": 1}, # {"text": "tell me about your services", "label": 1}, {"text": "what do you provide", "label": 1}, # {"text": "service list", "label": 1}, {"text": "offerings", "label": 1}, # {"text": "jusbook services", "label": 1}, {"text": "what's offered", "label": 1}, # {"text": "srvices", "label": 1}, {"text": "what u offer", "label": 1}, # # Contact (label 2) # {"text": "contact info", "label": 2}, {"text": "how to reach you", "label": 2}, # {"text": "support contact", "label": 2}, {"text": "email address", "label": 2}, # {"text": "phone number", "label": 2}, {"text": "address", "label": 2}, # {"text": "get in touch", "label": 2}, {"text": "contact details", "label": 2}, # {"text": "support info", "label": 2}, {"text": "how to contact", "label": 2}, # {"text": "cntact", "label": 2}, {"text": "email?", "label": 2}, # # Available slots (label 3) # {"text": "available slots", "label": 3}, {"text": "booking times", "label": 3}, # {"text": "what slots are free", "label": 3}, {"text": "show available bookings", "label": 3}, # {"text": "free slots", "label": 3}, {"text": "appointment times", "label": 3}, # {"text": "available appointments", "label": 3}, {"text": "slots for booking", "label": 3}, # {"text": "when can I book", "label": 3}, {"text": "open slots", "label": 3}, # {"text": "free slot?", "label": 3}, {"text": "slots plz", "label": 3}, # # Book slot (label 4) # {"text": "book a slot", "label": 4}, {"text": "reserve monday 10am-12pm", "label": 4}, # {"text": "book tuesday", "label": 4}, {"text": "schedule appointment", "label": 4}, # {"text": "reserve a time", "label": 4}, {"text": "book now", "label": 4}, # {"text": "make a booking", "label": 4}, {"text": "slot reservation", "label": 4}, # {"text": "book wednesday 9am-11am", "label": 4}, {"text": "reserve slot", "label": 4}, # {"text": "book mon 10am", "label": 4}, {"text": "reserve tues 2pm", "label": 4}, # # Broadcast (label 5) # {"text": "broadcast slots", "label": 5}, {"text": "announce available slots", "label": 5}, # {"text": "slot broadcast", "label": 5}, {"text": "share slots", "label": 5}, # {"text": "announce bookings", "label": 5}, {"text": "broadcast info", "label": 5}, # {"text": "publicize slots", "label": 5}, {"text": "slot announcement", "label": 5}, # {"text": "tell everyone about slots", "label": 5}, {"text": "broadcast", "label": 5}, # {"text": "announce slot", "label": 5}, {"text": "share slot plz", "label": 5}, # # Upcoming (label 6) # {"text": "upcoming events", "label": 6}, {"text": "future bookings", "label": 6}, # {"text": "what's coming up", "label": 6}, {"text": "upcoming appointments", "label": 6}, # {"text": "events soon", "label": 6}, {"text": "next events", "label": 6}, # {"text": "upcoming info", "label": 6}, {"text": "future events", "label": 6}, # {"text": "what bookings are upcoming", "label": 6}, {"text": "events list", "label": 6}, # {"text": "whats next", "label": 6}, {"text": "upcoming plz", "label": 6} # ] # } # return Dataset.from_dict({ # "text": [item["text"] for item in data["train"]], # "label": [item["label"] for item in data["train"]] # }) # # --- Model Training --- # def train_model(): # dataset = get_dataset() # tokenizer = DistilBertTokenizerFast.from_pretrained("distilbert-base-uncased") # model = DistilBertForSequenceClassification.from_pretrained("distilbert-base-uncased", num_labels=7) # def tokenize(batch): # return tokenizer(batch["text"], padding=True, truncation=True, return_tensors="pt") # dataset = dataset.map(tokenize, batched=True) # dataset.set_format("torch", columns=["input_ids", "attention_mask", "label"]) # training_args = TrainingArguments( # output_dir="./results", # num_train_epochs=3, # per_device_train_batch_size=8, # logging_steps=10, # save_steps=50, # evaluation_strategy="no", # learning_rate=5e-5, # ) # trainer = Trainer(model=model, args=training_args, train_dataset=dataset) # trainer.train() # model.save_pretrained("./trained_model") # tokenizer.save_pretrained("./trained_model") # print("Model trained and saved to ./trained_model") # return "โœ… Model training completed! Saved to ./trained_model" # # --- Model Loading --- # def load_model(): # model_path = "./trained_model" # Use fine-tuned model if available # if not os.path.exists(model_path): # model_path = "distilbert-base-uncased" # Fallback to base model # print(f"โš ๏ธ Using base model (no trained model found at {model_path})") # device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # model = DistilBertForSequenceClassification.from_pretrained(model_path, num_labels=7).to(device) # tokenizer = DistilBertTokenizerFast.from_pretrained(model_path) # model.eval() # classifier = pipeline( # "text-classification", # model=model, # tokenizer=tokenizer, # device=0 if device.type == 'cuda' else -1 # ) # return classifier, device # # --- Rule-Based Fallback --- # def rule_based_classify(text): # text_lower = text.lower() # if any(word in text_lower for word in ["hi", "hello", "hey", "greet", "helo", "hii"]): # return 0 # greet # elif any(word in text_lower for word in ["service", "offer", "book", "list", "srvices"]): # return 1 # services # elif any(word in text_lower for word in ["contact", "email", "phone", "address", "cntact"]): # return 2 # contact # elif any(word in text_lower for word in ["slot", "available", "time", "appointment"]): # return 3 # available_slots # elif "book" in text_lower or "reserve" in text_lower: # return 4 # book_slot # elif any(word in text_lower for word in ["broadcast", "announce", "share"]): # return 5 # broadcast # elif any(word in text_lower for word in ["upcoming", "event", "future", "next"]): # return 6 # upcoming # return -1 # unknown # # --- Intent Classification --- # def classify_intent(text): # try: # classifier, _ = load_model() # with torch.no_grad(): # prediction = classifier(text)[0] # label = int(prediction["label"].split("_")[1]) # return label # except Exception as e: # print(f"Model error: {e}, using rule-based fallback") # return rule_based_classify(text) # Fallback # # --- Response Logic --- # class ChatbotResponses: # def __init__(self): # self.intent_map = { # 0: "greet", 1: "services", 2: "contact", 3: "available_slots", # 4: "book_slot", 5: "broadcast", 6: "upcoming" # } # self.services = ["Legal Consultation", "Document Review", "Appointment Booking", "Virtual Meetings"] # self.contact_info = {"Email": "support@jusbook.com", "Phone": "+1-123-456-7890", "Address": "123 Justice Lane, Law City, USA"} # self.available_slots = ["Monday 10AM-12PM", "Tuesday 2PM-4PM", "Wednesday 9AM-11AM"] # self.upcoming_events = ["Webinar on Legal Rights: Sept 20, 2025", "Free Consultation Day: Oct 1, 2025"] # self.booked_slots = {} # def get_response(self, user_input, intent_label): # intent = self.intent_map.get(intent_label, "unknown") # processed = user_input.lower().strip() # if intent == "greet": # return "Hello! Welcome to jusbook.com. How can I help you today?" # elif intent == "services": # return "We offer:\n" + "\n".join(f"- {s}" for s in self.services) # elif intent == "contact": # return "Contact:\n" + "\n".join(f"- {k}: {v}" for k, v in self.contact_info.items()) # elif intent == "available_slots": # return "Available slots:\n" + "\n".join(f"- {s}" for s in self.available_slots) # elif intent == "broadcast": # return "๐Ÿ”Š **Broadcast Message**: Available slots:\n" + "\n".join(f"- {s}" for s in self.available_slots) # elif intent == "upcoming": # return "Upcoming Events:\n" + "\n".join(f"- {e}" for e in self.upcoming_events) # elif intent == "book_slot": # pattern = r'(monday|tuesday|wednesday)\s+(\d+am?|\d+pm?)-(\d+am?|\d+pm?)' # match = re.search(pattern, processed, re.IGNORECASE) # if match: # day = match.group(1).capitalize() # start = match.group(2).upper() # end = match.group(3).upper() # slot = f"{day} {start}-{end}" # if slot in self.available_slots: # self.available_slots.remove(slot) # self.booked_slots["user"] = slot # return f"โœ… **Booking Confirmed!**\n\n**Slot**: {slot}\n**Status**: Reserved for you!" # return f"โŒ **Slot Unavailable**\n\nThe requested slot '{slot}' is no longer available." # # Show available slots and prompt for specific booking # return "Available slots:\n" + "\n".join(f"- {s}" for s in self.available_slots) + "\n\n**To book**: Type 'book [day] [time]', e.g., 'book Monday 10AM-12PM'" # else: # return "Sorry, I didn't understand that. You can ask me about:\n- Services\n- Contact info\n- Available slots\n- Booking a slot\n- Upcoming events\n\nTry something like 'hello', 'services', or 'book Monday 10AM-12PM'" # # Global chatbot instance # bot = ChatbotResponses() # # --- Testing Function --- # def test_chatbot(): # """Run comprehensive tests for the chatbot""" # test_cases = [ # ("hello", 0, "Hello! Welcome to jusbook.com"), # ("what services", 1, "We offer:\n- Legal Consultation"), # ("contact info", 2, "Contact:\n- Email: support@jusbook.com"), # ("available slots", 3, "Available slots:\n- Monday 10AM-12PM"), # ("book monday 10am-12pm", 4, "Booking Confirmed"), # ("broadcast", 5, "Broadcast Message"), # ("upcoming events", 6, "Upcoming Events"), # ("random nonsense", -1, "Sorry, I didn't understand") # ] # results = [] # for input_text, expected_intent, expected_response_contains in test_cases: # try: # intent = classify_intent(input_text) # response = bot.get_response(input_text, intent) # intent_pass = "โœ…" if intent == expected_intent else "โŒ" # response_pass = "โœ…" if expected_response_contains in response else "โŒ" # results.append({ # "Input": input_text, # "Expected Intent": expected_intent, # "Got Intent": intent, # "Intent Pass": intent_pass, # "Response": response[:100] + "..." if len(response) > 100 else response, # "Response Pass": response_pass # }) # except Exception as e: # results.append({ # "Input": input_text, # "Error": str(e), # "Status": "โŒ Failed" # }) # # Print results # print("\n" + "="*80) # print("๐Ÿงช CHATBOT TEST RESULTS") # print("="*80) # all_passed = True # for result in results: # if "Error" in result: # print(f"โŒ {result['Input']}: {result['Error']}") # all_passed = False # else: # intent_status = result["Intent Pass"] # response_status = result["Response Pass"] # status = "โœ…" if intent_status == "โœ…" and response_status == "โœ…" else "โš ๏ธ" # print(f"{status} '{result['Input']}' โ†’ Intent: {result['Got Intent']} ({intent_status}) | Response: {response_status}") # if intent_status != "โœ…" or response_status != "โœ…": # all_passed = False # if all_passed: # print("\n๐ŸŽ‰ All tests passed! The chatbot is ready to use.") # else: # print("\nโš ๏ธ Some tests failed. Check the results above.") return results # --- Gradio Chat Function --- # def chat_response(message, history): # """Generate chatbot response for Gradio interface""" # if not message.strip(): # return history, "" # try: # # Classify intent # intent = classify_intent(message) # # Generate response # response = bot.get_response(message, intent) # # Add to history # history.append([message, response]) # return history, "" # except Exception as e: # error_msg = f"Sorry, something went wrong: {str(e)}" # history.append([message, error_msg]) # return history, "" # # --- Gradio Interface --- # def create_gradio_interface(): # """Create the main Gradio interface""" # with gr.Blocks(title="JusBook Chatbot", theme=gr.themes.Soft()) as demo: # gr.Markdown( # """ # # ๐Ÿค– **JusBook Chatbot** # *Your legal appointment assistant* # Ask me about: # - Available services # - Contact information # - Booking slots # - Upcoming events # **Try:** "hello", "services", "available slots", or "book Monday 10AM-12PM" # """ # ) # # Chat interface # chatbot = gr.Chatbot( # height=500, # show_label=False, # avatar_images=("user-avatar.png", "bot-avatar.png") # ) # msg = gr.Textbox( # placeholder="Type your message here...", # container=True, # label="Your Message", # scale=7 # ) # with gr.Row(): # submit_btn = gr.Button("Send", variant="primary", scale=1) # clear_btn = gr.Button("Clear Chat", scale=1) # test_btn = gr.Button("Run Tests", scale=1) # Status messages # status = gr.Textbox(label="Status", interactive=False) # # Event handlers # def submit_message(message, history): # return chat_response(message, history) # def clear_chat(): # return [], "" # def run_tests(): # results = test_chatbot() # # Convert results to chat format for display # test_history = [] # for result in results[:5]: # Show first 5 tests # if "Error" not in result: # test_msg = f"**Test:** {result['Input']}\n**Intent:** {result['Got Intent']} ({result['Intent Pass']})\n**Response:** {result['Response'][:50]}..." # test_history.append([test_msg, ""]) # return test_history, "๐Ÿงช Test results displayed above. Check console for full output." # msg.submit(submit_message, [msg, chatbot], [chatbot, msg]) # submit_btn.click(submit_message, [msg, chatbot], [chatbot, msg]) # clear_btn.click(clear_chat, None, [chatbot, msg]) # test_btn.click(run_tests, None, [chatbot, status]) # # Add examples # gr.Examples( # examples=[ # ["hello"], # ["what services do you offer?"], # ["show me available slots"], # ["book Monday 10AM-12PM"], # ["contact information"], # ["upcoming events"], # ["broadcast available slots"] # ], # inputs=msg, # label="Quick Examples" # ) # return demo # # --- Training Interface --- # def create_training_interface(): # """Create Gradio interface for model training""" # with gr.Blocks(title="Model Training") as demo: # gr.Markdown("# ๐Ÿ”„ **Model Training Dashboard**") # gr.Markdown("Train your custom DistilBERT model for the JusBook chatbot") # with gr.Row(): # train_btn = gr.Button("๐Ÿš€ Start Training", variant="primary", size="lg") # status = gr.Textbox(label="Training Status", interactive=False, lines=3) # # Training output # training_log = gr.Textbox(label="Training Log", lines=10, interactive=False) # def run_training(): # try: # log_content = [] # def log_callback(msg): # log_content.append(msg) # return "\n".join(log_content) # print("Starting model training...") # status_msg = train_model() # log_content.append(status_msg) # training_log.update(value="\n".join(log_content)) # status.update(value="โœ… Training completed successfully!") # return log_content, "โœ… Training completed successfully!" # except Exception as e: # error_msg = f"โŒ Training failed: {str(e)}" # status.update(value=error_msg) # return [error_msg], error_msg # train_btn.click( # run_training, # outputs=[training_log, status] # ) # gr.Markdown("### ๐Ÿ“Š Dataset Info") # gr.Markdown(f""" # **Training Examples:** {len(get_dataset())} # **Intents:** 7 (Greet, Services, Contact, Slots, Booking, Broadcast, Upcoming) # **Model:** DistilBERT-base (~66M parameters) # **Epochs:** 3 # """) # return demo # # --- Main Execution --- # def main(): # if len(sys.argv) > 1: # if sys.argv[1] == "--train": # print("๐Ÿ”„ Starting model training...") # train_model() # elif sys.argv[1] == "--test": # print("๐Ÿงช Running chatbot tests...") # test_chatbot() # else: # print("โ“ Unknown command. Use --train or --test") # sys.exit(1) # else: # # Launch Gradio interface # demo = create_gradio_interface() # demo.launch( # share=True, # For public sharing # server_name="0.0.0.0", # server_port=7860, # show_error=True # ) # if __name__ == "__main__": # main() import sys import re import os import torch import gradio as gr from datasets import Dataset from transformers import ( DistilBertTokenizerFast, DistilBertForSequenceClassification, Trainer, TrainingArguments, pipeline ) # --- Training Data --- (unchanged, your full dataset here) def get_dataset(): data = { "train": [ # Greet (label 0) {"text": "hi", "label": 0}, {"text": "hello", "label": 0}, {"text": "hey there", "label": 0}, {"text": "good morning", "label": 0}, {"text": "greetings", "label": 0}, {"text": "hi bot", "label": 0}, {"text": "hello jusbook", "label": 0}, {"text": "hey", "label": 0}, {"text": "start chat", "label": 0}, {"text": "yo", "label": 0}, {"text": "helo", "label": 0}, {"text": "hii", "label": 0}, # Services (label 1) {"text": "what services do you offer", "label": 1}, {"text": "list services", "label": 1}, {"text": "what can I book", "label": 1}, {"text": "services available", "label": 1}, {"text": "tell me about your services", "label": 1}, {"text": "what do you provide", "label": 1}, {"text": "service list", "label": 1}, {"text": "offerings", "label": 1}, {"text": "jusbook services", "label": 1}, {"text": "what's offered", "label": 1}, {"text": "srvices", "label": 1}, {"text": "what u offer", "label": 1}, # Contact (label 2) {"text": "contact info", "label": 2}, {"text": "how to reach you", "label": 2}, {"text": "support contact", "label": 2}, {"text": "email address", "label": 2}, {"text": "phone number", "label": 2}, {"text": "address", "label": 2}, {"text": "get in touch", "label": 2}, {"text": "contact details", "label": 2}, {"text": "support info", "label": 2}, {"text": "how to contact", "label": 2}, {"text": "cntact", "label": 2}, {"text": "email?", "label": 2}, # Available slots (label 3) {"text": "available slots", "label": 3}, {"text": "booking times", "label": 3}, {"text": "what slots are free", "label": 3}, {"text": "show available bookings", "label": 3}, {"text": "free slots", "label": 3}, {"text": "appointment times", "label": 3}, {"text": "available appointments", "label": 3}, {"text": "slots for booking", "label": 3}, {"text": "when can I book", "label": 3}, {"text": "open slots", "label": 3}, {"text": "free slot?", "label": 3}, {"text": "slots plz", "label": 3}, # Book slot (label 4) {"text": "book a slot", "label": 4}, {"text": "reserve monday 10am-12pm", "label": 4}, {"text": "book tuesday", "label": 4}, {"text": "schedule appointment", "label": 4}, {"text": "reserve a time", "label": 4}, {"text": "book now", "label": 4}, {"text": "make a booking", "label": 4}, {"text": "slot reservation", "label": 4}, {"text": "book wednesday 9am-11am", "label": 4}, {"text": "reserve slot", "label": 4}, {"text": "book mon 10am", "label": 4}, {"text": "reserve tues 2pm", "label": 4}, # Broadcast (label 5) {"text": "broadcast slots", "label": 5}, {"text": "announce available slots", "label": 5}, {"text": "slot broadcast", "label": 5}, {"text": "share slots", "label": 5}, {"text": "announce bookings", "label": 5}, {"text": "broadcast info", "label": 5}, {"text": "publicize slots", "label": 5}, {"text": "slot announcement", "label": 5}, {"text": "tell everyone about slots", "label": 5}, {"text": "broadcast", "label": 5}, {"text": "announce slot", "label": 5}, {"text": "share slot plz", "label": 5}, # Upcoming (label 6) {"text": "upcoming events", "label": 6}, {"text": "future bookings", "label": 6}, {"text": "what's coming up", "label": 6}, {"text": "upcoming appointments", "label": 6}, {"text": "events soon", "label": 6}, {"text": "next events", "label": 6}, {"text": "upcoming info", "label": 6}, {"text": "future events", "label": 6}, {"text": "what bookings are upcoming", "label": 6}, {"text": "events list", "label": 6}, {"text": "whats next", "label": 6}, {"text": "upcoming plz", "label": 6} ] } return Dataset.from_dict({ "text": [item["text"] for item in data["train"]], "label": [item["label"] for item in data["train"]] }) # --- Model Training --- (unchanged) def train_model(): dataset = get_dataset() tokenizer = DistilBertTokenizerFast.from_pretrained("distilbert-base-uncased") model = DistilBertForSequenceClassification.from_pretrained("distilbert-base-uncased", num_labels=7) def tokenize(batch): return tokenizer(batch["text"], padding=True, truncation=True, return_tensors="pt") dataset = dataset.map(tokenize, batched=True) dataset.set_format("torch", columns=["input_ids", "attention_mask", "label"]) training_args = TrainingArguments( output_dir="./results", num_train_epochs=3, per_device_train_batch_size=8, logging_steps=10, save_steps=50, evaluation_strategy="no", learning_rate=5e-5, ) trainer = Trainer(model=model, args=training_args, train_dataset=dataset) trainer.train() model.save_pretrained("./trained_model") tokenizer.save_pretrained("./trained_model") print("Model trained and saved to ./trained_model") return "โœ… Model training completed! Saved to ./trained_model" # --- Model Loading --- (unchanged) def load_model(): model_path = "./trained_model" if not os.path.exists(model_path): model_path = "distilbert-base-uncased" print(f"โš ๏ธ Using base model (train first for better accuracy!)") device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') model = DistilBertForSequenceClassification.from_pretrained(model_path, num_labels=7).to(device) tokenizer = DistilBertTokenizerFast.from_pretrained(model_path) model.eval() classifier = pipeline( "text-classification", model=model, tokenizer=tokenizer, device=0 if device.type == 'cuda' else -1 ) return classifier, device # --- FIXED Rule-Based Fallback --- (Better keywords, first priority) def rule_based_classify(text): text_lower = text.lower() print(f"DEBUG: Rule classify input: '{text_lower}'") # For debugging if any(word in text_lower for word in ["hi", "hello", "hey", "greet", "helo", "hii", "good morning"]): print("DEBUG: Matched greet") return 0 elif any(word in text_lower for word in ["service", "offer", "list", "srvices", "what can i"]): print("DEBUG: Matched services") return 1 elif any(word in text_lower for word in ["contact", "email", "phone", "address", "cntact", "support"]): print("DEBUG: Matched contact") return 2 elif any(word in text_lower for word in ["available", "free slot", "open slot", "when can i book"]): print("DEBUG: Matched available_slots") return 3 elif any(word in text_lower for word in ["book", "reserve", "schedule", "booked slots", "my booking", "slots booked"]): print("DEBUG: Matched book_slot") return 4 elif any(word in text_lower for word in ["broadcast", "announce", "share"]): print("DEBUG: Matched broadcast") return 5 elif any(word in text_lower for word in ["upcoming", "event", "future", "next", "show my bookings"]): print("DEBUG: Matched upcoming") return 6 print("DEBUG: No match, unknown") return -1 def classify_intent(text): # Try rules first (reliable without training) rule_intent = rule_based_classify(text) if rule_intent != -1: print(f"DEBUG: Using rule-based intent: {rule_intent}") return rule_intent # Fallback to model try: classifier, _ = load_model() with torch.no_grad(): prediction = classifier(text)[0] label_str = prediction["label"] label = int(label_str.split("_")[1]) if "_" in label_str else int(label_str) print(f"DEBUG: Model predicted: {label_str} (score: {prediction['score']:.2f})") return label except Exception as e: print(f"DEBUG: Model error: {e}, using rules") return rule_based_classify(text) # --- FIXED Response Logic --- (Better booking handling, show booked slots) class ChatbotResponses: def __init__(self): self.intent_map = { 0: "greet", 1: "services", 2: "contact", 3: "available_slots", 4: "book_slot", 5: "broadcast", 6: "upcoming" } self.services = ["Legal Consultation", "Document Review", "Appointment Booking", "Virtual Meetings"] self.contact_info = {"Email": "support@jusbook.com", "Phone": "+1-123-456-7890", "Address": "123 Justice Lane, Law City, USA"} self.available_slots = ["Monday 10AM-12PM", "Tuesday 2PM-4PM", "Wednesday 9AM-11AM"] self.upcoming_events = ["Webinar on Legal Rights: Sept 20, 2025", "Free Consultation Day: Oct 1, 2025"] self.booked_slots = {} # e.g., {"user": "Monday 10AM-12PM"} def get_response(self, user_input, intent_label): intent = self.intent_map.get(intent_label, "unknown") processed = user_input.lower().strip() print(f"DEBUG: Intent: {intent}, Input: {processed}") # Debug if intent == "greet": return "Hello! Welcome to jusbook.com. How can I help you today?" elif intent == "services": return "We offer:\n" + "\n".join(f"- {s}" for s in self.services) elif intent == "contact": return "Contact:\n" + "\n".join(f"- {k}: {v}" for k, v in self.contact_info.items()) elif intent == "available_slots": return "Available slots:\n" + "\n".join(f"- {s}" for s in self.available_slots) elif intent == "broadcast": return "๐Ÿ”Š **Broadcast Message**: Available slots:\n" + "\n".join(f"- {s}" for s in self.available_slots) elif intent == "upcoming": booked_info = "\n".join(f"- {user}: {slot}" for user, slot in self.booked_slots.items()) if self.booked_slots else "No upcoming bookings yet." return f"Upcoming Events:\n" + "\n".join(f"- {e}" for e in self.upcoming_events) + f"\n\nYour Bookings:\n{booked_info}" elif intent == "book_slot": # Enhanced regex for booking book_pattern = r'book\s+(monday|tuesday|wednesday)\s+(\d+am?|\d+pm?)-(\d+am?|\d+pm?)' match = re.search(book_pattern, processed, re.IGNORECASE) if match: day = match.group(1).capitalize() start = match.group(2).upper() end = match.group(3).upper() slot = f"{day} {start}-{end}" if slot in self.available_slots: self.available_slots.remove(slot) self.booked_slots["user"] = slot # Simple user tracking return f"โœ… **Booking Confirmed!**\n\n**Slot**: {slot}\n**Status**: Reserved for you!" return f"โŒ **Slot Unavailable**\n\nThe requested slot '{slot}' is no longer available." # If no specific book, check if query is about booked slots if any(word in processed for word in ["booked", "my booking", "slots booked"]): if self.booked_slots: booked_info = "\n".join(f"- {user}: {slot}" for user, slot in self.booked_slots.items()) return f"Your Booked Slots:\n{booked_info}\n\nAvailable: " + "\n".join(f"- {s}" for s in self.available_slots) return "No slots booked yet. Available:\n" + "\n".join(f"- {s}" for s in self.available_slots) # Default prompt return "Available slots:\n" + "\n".join(f"- {s}" for s in self.available_slots) + "\n\n**To book**: Type 'book [day] [time]', e.g., 'book Monday 10AM-12PM'" else: return "Sorry, I didn't understand that. You can ask me about:\n- Services\n- Contact info\n- Available slots\n- Booking a slot (e.g., 'book Monday 10AM-12PM')\n- Upcoming events\n- Your booked slots" # Global instance bot = ChatbotResponses() # --- UPDATED Testing Function --- (Added your failing cases) def test_chatbot(): test_cases = [ ("hello", 0, "Hello! Welcome"), ("contact info", 2, "Contact:\n- Email"), ("available slots", 3, "Available slots"), ("book monday 10am-12pm", 4, "Booking Confirmed"), ("slots booked", 4, "Your Booked Slots"), # New: Should match booking intent ("booked slots", 4, "No slots booked yet"), # New ("broadcast", 5, "Broadcast Message"), ("upcoming events", 6, "Upcoming Events"), ("random text", -1, "Sorry, I didn't understand") ] results = [] for input_text, expected_intent, expected_response_contains in test_cases: intent = classify_intent(input_text) response = bot.get_response(input_text, intent) intent_pass = "โœ…" if intent == expected_intent else "โŒ" response_pass = "โœ…" if expected_response_contains in response else "โŒ" results.append({ "Input": input_text, "Expected Intent": expected_intent, "Got Intent": intent, "Intent Pass": intent_pass, "Response": response[:100] + "..." if len(response) > 100 else response, "Response Pass": response_pass }) print(f"Test '{input_text}': Intent {intent} ({intent_pass}), Response has '{expected_response_contains}' ({response_pass})") all_passed = all(r["Intent Pass"] == "โœ…" and r["Response Pass"] == "โœ…" for r in results) print(f"\n๐ŸŽ‰ {'All' if all_passed else 'Some failed - check above'} tests passed!") return results # --- Gradio Chat Function --- (unchanged, but with debug prints) def chat_response(message, history): if not message.strip(): return history, "" print(f"\n--- New Message: {message} ---") # Debug separator try: intent = classify_intent(message) response = bot.get_response(message, intent) history.append([message, response]) print(f"Response: {response[:50]}...") # Debug return history, "" except Exception as e: error_msg = f"Sorry, something went wrong: {str(e)}" history.append([message, error_msg]) return history, "" # --- Gradio Interface --- (Added debug console link) def create_gradio_interface(): with gr.Blocks(title="JusBook Chatbot", theme=gr.themes.Soft()) as demo: gr.Markdown("# ๐Ÿค– **JusBook Chatbot** (Debug Mode)") gr.Markdown("*Check browser console for DEBUG logs*") chatbot = gr.Chatbot(height=500) msg = gr.Textbox(placeholder="Type your message here...", label="Your Message") with gr.Row(): submit_btn = gr.Button("Send", variant="primary") clear_btn = gr.Button("Clear Chat") test_btn = gr.Button("Run Tests") status = gr.Textbox(label="Status", interactive=False) def submit_message(message, history): return chat_response(message, history) def clear_chat(): return [], "" def run_tests(): results = test_chatbot() test_history = [[r["Input"], f"Intent: {r['Got Intent']} | Pass: {r['Intent Pass'] + r['Response Pass']}"] for r in results] return test_history, f"๐Ÿงช Tests complete! All passed: {all(r['Intent Pass'] == 'โœ…' and r['Response Pass'] == 'โœ…' for r in results)}" msg.submit(submit_message, [msg, chatbot], [chatbot, msg]) submit_btn.click(submit_message, [msg, chatbot], [chatbot, msg]) clear_btn.click(clear_chat, None, [chatbot, msg]) test_btn.click(run_tests, None, [chatbot, status]) gr.Examples( examples=[ ["hello"], ["contact info"], ["available slots"], ["book Monday 10AM-12PM"], ["slots booked"], ["booked slots"], ["upcoming events"] ], inputs=msg, label="Test These (Including Your Failing Ones)" ) return demo # --- Main Execution --- def main(): if len(sys.argv) > 1: if sys.argv[1] == "--train": print("๐Ÿ”„ Training...") train_model() elif sys.argv[1] == "--test": print("๐Ÿงช Testing...") test_chatbot() else: print("Usage: python app.py [--train | --test]") sys.exit(1) else: demo = create_gradio_interface() demo.launch(share=True, server_port=7860, show_error=True) if __name__ == "__main__": main()