Spaces:
Build error
Build error
Upload 12 files
Browse files- app.py +24 -0
- fine_tuned_model (1).zip +3 -0
- fine_tuned_model (1)/config.json +39 -0
- fine_tuned_model (1)/generation_config.json +6 -0
- fine_tuned_model (1)/merges.txt +0 -0
- fine_tuned_model (1)/model.safetensors +3 -0
- fine_tuned_model (1)/special_tokens_map.json +6 -0
- fine_tuned_model (1)/tokenizer.json +0 -0
- fine_tuned_model (1)/tokenizer_config.json +21 -0
- fine_tuned_model (1)/vocab.json +0 -0
- requirements.txt +6 -0
- text_generation_finetunning_notebook.ipynb +290 -0
app.py
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer,pipeline
|
| 3 |
+
import torch
|
| 4 |
+
|
| 5 |
+
st.title("Text_Generator Fine tunning model")
|
| 6 |
+
|
| 7 |
+
# Load model and tokenizer
|
| 8 |
+
model_dir = "fine_tuned_model (1)"
|
| 9 |
+
tokenizer = AutoTokenizer.from_pretrained(model_dir)
|
| 10 |
+
model = AutoModelForCausalLM.from_pretrained(model_dir)
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
code_generator = pipeline("text-generation", model=model, tokenizer=tokenizer)
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
inputs_text=st.text_input("Please enter the text",value="def quicksort(arr):")
|
| 19 |
+
|
| 20 |
+
if st.button("submit"):
|
| 21 |
+
generated_code = code_generator(inputs_text, max_length=200, num_return_sequences=1)
|
| 22 |
+
|
| 23 |
+
st.write(generated_code[0]["generated_text"])
|
| 24 |
+
|
fine_tuned_model (1).zip
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e571aaa5e03efcbab67c083fb3884631f00fec87b86aeef60a6dbc298b4ed31a
|
| 3 |
+
size 463917526
|
fine_tuned_model (1)/config.json
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_name_or_path": "gpt2",
|
| 3 |
+
"activation_function": "gelu_new",
|
| 4 |
+
"architectures": [
|
| 5 |
+
"GPT2LMHeadModel"
|
| 6 |
+
],
|
| 7 |
+
"attn_pdrop": 0.1,
|
| 8 |
+
"bos_token_id": 50256,
|
| 9 |
+
"embd_pdrop": 0.1,
|
| 10 |
+
"eos_token_id": 50256,
|
| 11 |
+
"initializer_range": 0.02,
|
| 12 |
+
"layer_norm_epsilon": 1e-05,
|
| 13 |
+
"model_type": "gpt2",
|
| 14 |
+
"n_ctx": 1024,
|
| 15 |
+
"n_embd": 768,
|
| 16 |
+
"n_head": 12,
|
| 17 |
+
"n_inner": null,
|
| 18 |
+
"n_layer": 12,
|
| 19 |
+
"n_positions": 1024,
|
| 20 |
+
"reorder_and_upcast_attn": false,
|
| 21 |
+
"resid_pdrop": 0.1,
|
| 22 |
+
"scale_attn_by_inverse_layer_idx": false,
|
| 23 |
+
"scale_attn_weights": true,
|
| 24 |
+
"summary_activation": null,
|
| 25 |
+
"summary_first_dropout": 0.1,
|
| 26 |
+
"summary_proj_to_labels": true,
|
| 27 |
+
"summary_type": "cls_index",
|
| 28 |
+
"summary_use_proj": true,
|
| 29 |
+
"task_specific_params": {
|
| 30 |
+
"text-generation": {
|
| 31 |
+
"do_sample": true,
|
| 32 |
+
"max_length": 50
|
| 33 |
+
}
|
| 34 |
+
},
|
| 35 |
+
"torch_dtype": "float32",
|
| 36 |
+
"transformers_version": "4.49.0",
|
| 37 |
+
"use_cache": true,
|
| 38 |
+
"vocab_size": 50257
|
| 39 |
+
}
|
fine_tuned_model (1)/generation_config.json
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_from_model_config": true,
|
| 3 |
+
"bos_token_id": 50256,
|
| 4 |
+
"eos_token_id": 50256,
|
| 5 |
+
"transformers_version": "4.49.0"
|
| 6 |
+
}
|
fine_tuned_model (1)/merges.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
fine_tuned_model (1)/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e5329d3767b696ddf346aae65954b3679aa13b71d1d4a577be2a1b1e5cfdf7d0
|
| 3 |
+
size 497774208
|
fine_tuned_model (1)/special_tokens_map.json
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"bos_token": "<|endoftext|>",
|
| 3 |
+
"eos_token": "<|endoftext|>",
|
| 4 |
+
"pad_token": "<|endoftext|>",
|
| 5 |
+
"unk_token": "<|endoftext|>"
|
| 6 |
+
}
|
fine_tuned_model (1)/tokenizer.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
fine_tuned_model (1)/tokenizer_config.json
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"add_prefix_space": false,
|
| 3 |
+
"added_tokens_decoder": {
|
| 4 |
+
"50256": {
|
| 5 |
+
"content": "<|endoftext|>",
|
| 6 |
+
"lstrip": false,
|
| 7 |
+
"normalized": true,
|
| 8 |
+
"rstrip": false,
|
| 9 |
+
"single_word": false,
|
| 10 |
+
"special": true
|
| 11 |
+
}
|
| 12 |
+
},
|
| 13 |
+
"bos_token": "<|endoftext|>",
|
| 14 |
+
"clean_up_tokenization_spaces": false,
|
| 15 |
+
"eos_token": "<|endoftext|>",
|
| 16 |
+
"extra_special_tokens": {},
|
| 17 |
+
"model_max_length": 1024,
|
| 18 |
+
"pad_token": "<|endoftext|>",
|
| 19 |
+
"tokenizer_class": "GPT2Tokenizer",
|
| 20 |
+
"unk_token": "<|endoftext|>"
|
| 21 |
+
}
|
fine_tuned_model (1)/vocab.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
requirements.txt
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
transformers
|
| 2 |
+
pandas==2.2.2
|
| 3 |
+
torch==2.5.1
|
| 4 |
+
transformers==4.48.3
|
| 5 |
+
streamlit==1.41.1
|
| 6 |
+
bitsandbytes==0.45.3
|
text_generation_finetunning_notebook.ipynb
ADDED
|
@@ -0,0 +1,290 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "code",
|
| 5 |
+
"execution_count": 6,
|
| 6 |
+
"id": "9665f082-b1e2-4094-a9c4-f5fa4560e01f",
|
| 7 |
+
"metadata": {},
|
| 8 |
+
"outputs": [],
|
| 9 |
+
"source": [
|
| 10 |
+
"from transformers import AutoModelForCausalLM, AutoTokenizer\n",
|
| 11 |
+
"\n",
|
| 12 |
+
"model_name = \"gpt2\" \n",
|
| 13 |
+
"model = AutoModelForCausalLM.from_pretrained(model_name)\n",
|
| 14 |
+
"tokenizer = AutoTokenizer.from_pretrained(model_name)\n",
|
| 15 |
+
"\n",
|
| 16 |
+
"# Ensure the tokenizer uses padding if necessary\n",
|
| 17 |
+
"tokenizer.pad_token = tokenizer.eos_token \n"
|
| 18 |
+
]
|
| 19 |
+
},
|
| 20 |
+
{
|
| 21 |
+
"cell_type": "code",
|
| 22 |
+
"execution_count": 7,
|
| 23 |
+
"id": "8c81406c-1335-4491-b8cd-67770e86e390",
|
| 24 |
+
"metadata": {},
|
| 25 |
+
"outputs": [],
|
| 26 |
+
"source": [
|
| 27 |
+
"from datasets import load_dataset\n",
|
| 28 |
+
"\n",
|
| 29 |
+
"dataset = load_dataset(\"wikitext\", \"wikitext-2-raw-v1\")\n"
|
| 30 |
+
]
|
| 31 |
+
},
|
| 32 |
+
{
|
| 33 |
+
"cell_type": "code",
|
| 34 |
+
"execution_count": 8,
|
| 35 |
+
"id": "2fd0c7d7-1c01-416c-af00-2d11a51663f1",
|
| 36 |
+
"metadata": {},
|
| 37 |
+
"outputs": [
|
| 38 |
+
{
|
| 39 |
+
"data": {
|
| 40 |
+
"application/vnd.jupyter.widget-view+json": {
|
| 41 |
+
"model_id": "833d3e6bacf94b4f83849b76e554c187",
|
| 42 |
+
"version_major": 2,
|
| 43 |
+
"version_minor": 0
|
| 44 |
+
},
|
| 45 |
+
"text/plain": [
|
| 46 |
+
"Map: 0%| | 0/36718 [00:00<?, ? examples/s]"
|
| 47 |
+
]
|
| 48 |
+
},
|
| 49 |
+
"metadata": {},
|
| 50 |
+
"output_type": "display_data"
|
| 51 |
+
}
|
| 52 |
+
],
|
| 53 |
+
"source": [
|
| 54 |
+
"def tokenize_function(examples):\n",
|
| 55 |
+
" return tokenizer(examples[\"text\"], truncation=True, padding=\"max_length\", max_length=512)\n",
|
| 56 |
+
"\n",
|
| 57 |
+
"tokenized_datasets = dataset.map(tokenize_function, batched=True)\n"
|
| 58 |
+
]
|
| 59 |
+
},
|
| 60 |
+
{
|
| 61 |
+
"cell_type": "code",
|
| 62 |
+
"execution_count": 9,
|
| 63 |
+
"id": "85a7f1be-a72d-4b94-b232-4942616810f9",
|
| 64 |
+
"metadata": {},
|
| 65 |
+
"outputs": [
|
| 66 |
+
{
|
| 67 |
+
"name": "stderr",
|
| 68 |
+
"output_type": "stream",
|
| 69 |
+
"text": [
|
| 70 |
+
"/home/zeus/miniconda3/envs/cloudspace/lib/python3.10/site-packages/transformers/training_args.py:1594: FutureWarning: `evaluation_strategy` is deprecated and will be removed in version 4.46 of 🤗 Transformers. Use `eval_strategy` instead\n",
|
| 71 |
+
" warnings.warn(\n"
|
| 72 |
+
]
|
| 73 |
+
}
|
| 74 |
+
],
|
| 75 |
+
"source": [
|
| 76 |
+
"from transformers import TrainingArguments\n",
|
| 77 |
+
"\n",
|
| 78 |
+
"training_args = TrainingArguments(\n",
|
| 79 |
+
" output_dir=\"./results\",\n",
|
| 80 |
+
" evaluation_strategy=\"epoch\",\n",
|
| 81 |
+
" save_strategy=\"epoch\",\n",
|
| 82 |
+
" per_device_train_batch_size=8, # Adjust based on your GPU\n",
|
| 83 |
+
" per_device_eval_batch_size=8,\n",
|
| 84 |
+
" logging_dir=\"./logs\",\n",
|
| 85 |
+
" logging_steps=10,\n",
|
| 86 |
+
" num_train_epochs=1,\n",
|
| 87 |
+
" report_to=\"none\", # Change to \"wandb\" or \"tensorboard\" if using logging\n",
|
| 88 |
+
")\n"
|
| 89 |
+
]
|
| 90 |
+
},
|
| 91 |
+
{
|
| 92 |
+
"cell_type": "code",
|
| 93 |
+
"execution_count": 10,
|
| 94 |
+
"id": "cb46a328-74ef-420a-b5d7-b3159cc8f5b0",
|
| 95 |
+
"metadata": {},
|
| 96 |
+
"outputs": [
|
| 97 |
+
{
|
| 98 |
+
"data": {
|
| 99 |
+
"text/html": [
|
| 100 |
+
"\n",
|
| 101 |
+
" <div>\n",
|
| 102 |
+
" \n",
|
| 103 |
+
" <progress value='4590' max='4590' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
|
| 104 |
+
" [4590/4590 1:19:10, Epoch 1/1]\n",
|
| 105 |
+
" </div>\n",
|
| 106 |
+
" <table border=\"1\" class=\"dataframe\">\n",
|
| 107 |
+
" <thead>\n",
|
| 108 |
+
" <tr style=\"text-align: left;\">\n",
|
| 109 |
+
" <th>Epoch</th>\n",
|
| 110 |
+
" <th>Training Loss</th>\n",
|
| 111 |
+
" <th>Validation Loss</th>\n",
|
| 112 |
+
" </tr>\n",
|
| 113 |
+
" </thead>\n",
|
| 114 |
+
" <tbody>\n",
|
| 115 |
+
" <tr>\n",
|
| 116 |
+
" <td>1</td>\n",
|
| 117 |
+
" <td>3.239600</td>\n",
|
| 118 |
+
" <td>3.291132</td>\n",
|
| 119 |
+
" </tr>\n",
|
| 120 |
+
" </tbody>\n",
|
| 121 |
+
"</table><p>"
|
| 122 |
+
],
|
| 123 |
+
"text/plain": [
|
| 124 |
+
"<IPython.core.display.HTML object>"
|
| 125 |
+
]
|
| 126 |
+
},
|
| 127 |
+
"metadata": {},
|
| 128 |
+
"output_type": "display_data"
|
| 129 |
+
},
|
| 130 |
+
{
|
| 131 |
+
"data": {
|
| 132 |
+
"text/plain": [
|
| 133 |
+
"TrainOutput(global_step=4590, training_loss=3.347612351062251, metrics={'train_runtime': 4751.264, 'train_samples_per_second': 7.728, 'train_steps_per_second': 0.966, 'total_flos': 9594120830976000.0, 'train_loss': 3.347612351062251, 'epoch': 1.0})"
|
| 134 |
+
]
|
| 135 |
+
},
|
| 136 |
+
"execution_count": 10,
|
| 137 |
+
"metadata": {},
|
| 138 |
+
"output_type": "execute_result"
|
| 139 |
+
}
|
| 140 |
+
],
|
| 141 |
+
"source": [
|
| 142 |
+
"from transformers import Trainer, TrainingArguments, DataCollatorForLanguageModeling\n",
|
| 143 |
+
"\n",
|
| 144 |
+
"data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)\n",
|
| 145 |
+
"\n",
|
| 146 |
+
"trainer = Trainer(\n",
|
| 147 |
+
" model=model,\n",
|
| 148 |
+
" args=training_args,\n",
|
| 149 |
+
" train_dataset=tokenized_datasets[\"train\"],\n",
|
| 150 |
+
" eval_dataset=tokenized_datasets[\"validation\"],\n",
|
| 151 |
+
" data_collator=data_collator,\n",
|
| 152 |
+
")\n",
|
| 153 |
+
"\n",
|
| 154 |
+
"trainer.train()\n"
|
| 155 |
+
]
|
| 156 |
+
},
|
| 157 |
+
{
|
| 158 |
+
"cell_type": "code",
|
| 159 |
+
"execution_count": 11,
|
| 160 |
+
"id": "d257f423-a9ea-4fe2-9fcf-bebcf1cd356d",
|
| 161 |
+
"metadata": {},
|
| 162 |
+
"outputs": [
|
| 163 |
+
{
|
| 164 |
+
"data": {
|
| 165 |
+
"text/plain": [
|
| 166 |
+
"('fine_tuned_model/tokenizer_config.json',\n",
|
| 167 |
+
" 'fine_tuned_model/special_tokens_map.json',\n",
|
| 168 |
+
" 'fine_tuned_model/vocab.json',\n",
|
| 169 |
+
" 'fine_tuned_model/merges.txt',\n",
|
| 170 |
+
" 'fine_tuned_model/added_tokens.json',\n",
|
| 171 |
+
" 'fine_tuned_model/tokenizer.json')"
|
| 172 |
+
]
|
| 173 |
+
},
|
| 174 |
+
"execution_count": 11,
|
| 175 |
+
"metadata": {},
|
| 176 |
+
"output_type": "execute_result"
|
| 177 |
+
}
|
| 178 |
+
],
|
| 179 |
+
"source": [
|
| 180 |
+
"model.save_pretrained(\"fine_tuned_model\")\n",
|
| 181 |
+
"tokenizer.save_pretrained(\"fine_tuned_model\")"
|
| 182 |
+
]
|
| 183 |
+
},
|
| 184 |
+
{
|
| 185 |
+
"cell_type": "code",
|
| 186 |
+
"execution_count": 12,
|
| 187 |
+
"id": "493e4e36-45a6-4cd2-b37d-2e8e534f1a39",
|
| 188 |
+
"metadata": {},
|
| 189 |
+
"outputs": [
|
| 190 |
+
{
|
| 191 |
+
"name": "stdout",
|
| 192 |
+
"output_type": "stream",
|
| 193 |
+
"text": [
|
| 194 |
+
"Folder 'fine_tuned_model' has been zipped as 'fine_tuned_model.zip'.\n"
|
| 195 |
+
]
|
| 196 |
+
}
|
| 197 |
+
],
|
| 198 |
+
"source": [
|
| 199 |
+
"import shutil\n",
|
| 200 |
+
"\n",
|
| 201 |
+
"# Specify the folder to be zipped\n",
|
| 202 |
+
"folder_path = \"fine_tuned_model\" # Replace with your actual folder name\n",
|
| 203 |
+
"zip_name = \"fine_tuned_model.zip\" # Desired zip file name\n",
|
| 204 |
+
"\n",
|
| 205 |
+
"# Create a zip archive\n",
|
| 206 |
+
"shutil.make_archive(zip_name.replace('.zip', ''), 'zip', folder_path)\n",
|
| 207 |
+
"\n",
|
| 208 |
+
"print(f\"Folder '{folder_path}' has been zipped as '{zip_name}'.\")"
|
| 209 |
+
]
|
| 210 |
+
},
|
| 211 |
+
{
|
| 212 |
+
"cell_type": "code",
|
| 213 |
+
"execution_count": 18,
|
| 214 |
+
"id": "fda9cf8b-1e3c-47c2-8a60-11cccf2d608a",
|
| 215 |
+
"metadata": {},
|
| 216 |
+
"outputs": [],
|
| 217 |
+
"source": [
|
| 218 |
+
"from transformers import pipeline"
|
| 219 |
+
]
|
| 220 |
+
},
|
| 221 |
+
{
|
| 222 |
+
"cell_type": "code",
|
| 223 |
+
"execution_count": 20,
|
| 224 |
+
"id": "d60aa595-6bff-4686-a9ba-3e9b993a54ed",
|
| 225 |
+
"metadata": {},
|
| 226 |
+
"outputs": [
|
| 227 |
+
{
|
| 228 |
+
"name": "stderr",
|
| 229 |
+
"output_type": "stream",
|
| 230 |
+
"text": [
|
| 231 |
+
"Device set to use cuda:0\n",
|
| 232 |
+
"Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.\n"
|
| 233 |
+
]
|
| 234 |
+
},
|
| 235 |
+
{
|
| 236 |
+
"name": "stdout",
|
| 237 |
+
"output_type": "stream",
|
| 238 |
+
"text": [
|
| 239 |
+
"def quicksort(arr): \n",
|
| 240 |
+
"\n",
|
| 241 |
+
"Proscure = \n",
|
| 242 |
+
"\n",
|
| 243 |
+
"Faced with a choice between the current and previous values, an error's resolution in a new value is not necessarily in order, since the first one is the first one that does not change. Prof will have to return a retry call for all possible errors returned from the previous value, which is equivalent to a new retry ( q @-@ f ). A simple recursion will perform only one recursion on the results. \n",
|
| 244 |
+
"\n",
|
| 245 |
+
"A recursion in alliter @-@ ordered values is done if it's possible to reorder them at all. This means a recursion in the first function of an array's contents is done if it isn 't possible to reorder them at all. This means, for example, that an array would have to be returned the same number of times in order to work as an array is. \n",
|
| 246 |
+
"\n",
|
| 247 |
+
"A recursion in\n"
|
| 248 |
+
]
|
| 249 |
+
}
|
| 250 |
+
],
|
| 251 |
+
"source": [
|
| 252 |
+
"code_generator = pipeline(\"text-generation\", model=\"fine_tuned_model\", tokenizer=tokenizer)\n",
|
| 253 |
+
"\n",
|
| 254 |
+
"prompt = \"def quicksort(arr):\"\n",
|
| 255 |
+
"generated_code = code_generator(prompt, max_length=200, num_return_sequences=1)\n",
|
| 256 |
+
"\n",
|
| 257 |
+
"print(generated_code[0][\"generated_text\"])"
|
| 258 |
+
]
|
| 259 |
+
},
|
| 260 |
+
{
|
| 261 |
+
"cell_type": "code",
|
| 262 |
+
"execution_count": null,
|
| 263 |
+
"id": "7c82d049-147d-49e0-bc87-b7793c01dba1",
|
| 264 |
+
"metadata": {},
|
| 265 |
+
"outputs": [],
|
| 266 |
+
"source": []
|
| 267 |
+
}
|
| 268 |
+
],
|
| 269 |
+
"metadata": {
|
| 270 |
+
"kernelspec": {
|
| 271 |
+
"display_name": "Python 3",
|
| 272 |
+
"language": "python",
|
| 273 |
+
"name": "python3"
|
| 274 |
+
},
|
| 275 |
+
"language_info": {
|
| 276 |
+
"codemirror_mode": {
|
| 277 |
+
"name": "ipython",
|
| 278 |
+
"version": 3
|
| 279 |
+
},
|
| 280 |
+
"file_extension": ".py",
|
| 281 |
+
"mimetype": "text/x-python",
|
| 282 |
+
"name": "python",
|
| 283 |
+
"nbconvert_exporter": "python",
|
| 284 |
+
"pygments_lexer": "ipython3",
|
| 285 |
+
"version": "3.10.10"
|
| 286 |
+
}
|
| 287 |
+
},
|
| 288 |
+
"nbformat": 4,
|
| 289 |
+
"nbformat_minor": 5
|
| 290 |
+
}
|