Spaces:

rishikasharma
/

Chatbot

Runtime error

App Files Files Community

rishikasharma commited on Jan 28, 2025

Commit

4d1b6b3

verified ·

1 Parent(s): 8ab9a1c

Update app.py

Browse files

Files changed (1) hide show

app.py +46 -9

app.py CHANGED Viewed

@@ -1,10 +1,51 @@
 import gradio as gr
 from huggingface_hub import InferenceClient
-"""
-For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
-"""
-client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
 def respond(
@@ -39,10 +80,6 @@ def respond(
         response += token
         yield response
-"""
-For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
-"""
 demo = gr.ChatInterface(
     respond,
     additional_inputs=[
@@ -61,4 +98,4 @@ demo = gr.ChatInterface(
 if __name__ == "__main__":
-    demo.launch()

 import gradio as gr
 from huggingface_hub import InferenceClient
+from datasets import load_dataset
+from transformers import AutoTokenizer, AutoModelForCausalLM
+from transformers  import Trainer, TrainingArguments
+model_name = "HuggingFaceH4/zephyr-7b-beta"
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+model = AutoModelForCausalLM.from_pretrained(model_name)
+dataset = load_dataset("json", data_files="data.json", split = "train")
+# Tokenize the dataset
+def preprocess_function(examples):
+    inputs = [example['input'] for example in examples]
+    targets = [examples['output'] for example in examples]
+    model_inputs = tokenizer(inputs, padding=True, truncation=True)
+    labels = tokenizer(targets, padding=True, truncation=True).input_ids
+    model_inputs['labels'] = labels
+    return model_inputs
+tokenized_datasets = dataset.map(preprocess_function, batched = True)
+training_args = TrainingArguments(
+    output_dir = "./results",
+    evaluation_strategy = "epoch",
+    learning_rate = 2e-5,
+    per_device_train_batch_size = 3,
+    weight_decay = 0.01,
+)
+trainer = Trainer(
+    model = model,
+    args = training_args,
+    train_dataset = tokenized_datasets["train"],
+    eval_dataset = tokenized_datasets["validation"],
+)
+#  Start fine-tuning
+trainer.train()
+trainer.evaluate()
+model.save_pretrained("./fine_tuned_model")
+tokenizer.save_pretrained("./fine_tuned_model")
+client = InferenceClient("./fine_tuned_model")
 def respond(
         response += token
         yield response
 demo = gr.ChatInterface(
     respond,
     additional_inputs=[
 if __name__ == "__main__":
+    demo.launch()