Neo commited on
Commit
cfd24c9
·
1 Parent(s): 859566f
Files changed (2) hide show
  1. app.py +69 -0
  2. requirements.txt +4 -0
app.py ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ # --- FIX 1: Added 'pipeline' and 'DataCollatorForLanguageModeling' to imports ---
3
+ from transformers import AutoTokenizer, AutoModelForCausalLM, Trainer, TrainingArguments, pipeline, DataCollatorForLanguageModeling
4
+ from datasets import load_dataset
5
+
6
+ ds = load_dataset("kaifkhaan/roast")
7
+
8
+ tokenizer = AutoTokenizer.from_pretrained("distilgpt2")
9
+ model = AutoModelForCausalLM.from_pretrained("distilgpt2")
10
+
11
+ tokenizer.pad_token = tokenizer.eos_token
12
+
13
+ # Preprocessing function
14
+ def preprocess(example):
15
+ prompt = example["User"]
16
+ response = example["Roasting Bot"]
17
+ return tokenizer(
18
+ f"{prompt} -> {response}",
19
+ truncation=True,
20
+ max_length=128, # Ensures all inputs are the same size
21
+ padding="max_length"
22
+ )
23
+
24
+ # Map the preprocessing function to the dataset
25
+ tokenized_ds = ds.map(preprocess, batched=True, remove_columns=ds['train'].column_names)
26
+
27
+ data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)
28
+
29
+ # Define training arguments
30
+ training_args = TrainingArguments(
31
+ output_dir="./roastbot",
32
+ per_device_train_batch_size=8,
33
+ num_train_epochs=3,
34
+ logging_dir="./logs",
35
+ save_steps=500,
36
+ report_to="none" # Add this to disable wandb/tensorboard logging if not configured
37
+ )
38
+
39
+ # Initialize the Trainer
40
+ trainer = Trainer(
41
+ model=model,
42
+ args=training_args,
43
+ train_dataset=tokenized_ds["train"],
44
+ data_collator=data_collator
45
+ )
46
+
47
+ print("Starting training... 🏋️")
48
+ trainer.train()
49
+ print("Training complete! ✅")
50
+
51
+ roast_pipeline = pipeline(
52
+ "text-generation",
53
+ model=model,
54
+ tokenizer=tokenizer
55
+ )
56
+
57
+ def roast_me(text):
58
+ prompt = f"{text} ->"
59
+ # Generate the roast
60
+ roast = roast_pipeline(prompt, max_length=50, do_sample=True, pad_token_id=tokenizer.eos_token_id)[0]["generated_text"]
61
+ return roast.split("->")[-1].strip()
62
+
63
+ gr.Interface(
64
+ fn=roast_me,
65
+ inputs="text",
66
+ outputs="text",
67
+ title="RoastBot 3000 🔥",
68
+ description="Type something about yourself and let the bot roast you."
69
+ ).launch()
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ transformers
2
+ datasets
3
+ torch
4
+ gradio