Spaces:

Sunaina792
/

text-style-transfer

Runtime error

App Files Files Community

Sunaina792 commited on Dec 21, 2025

Commit

a6bdbee

verified ·

1 Parent(s): 025feca

Upload 2 files

Browse files

Files changed (2) hide show

normal_to_formal.ipynb +331 -0
normal_to_genz.ipynb +0 -0

normal_to_formal.ipynb ADDED Viewed

	@@ -0,0 +1,331 @@

+{
+  "cells": [
+    {
+      "cell_type": "code",
+      "execution_count": 2,
+      "metadata": {
+        "id": "4KDV129CjSUr"
+      },
+      "outputs": [
+        {
+          "data": {
+            "application/vnd.jupyter.widget-view+json": {
+              "model_id": "cfcbb81f755540bbbee503cce0b039eb",
+              "version_major": 2,
+              "version_minor": 0
+            },
+            "text/plain": [
+              "tokenizer_config.json: 0.00B [00:00, ?B/s]"
+            ]
+          },
+          "metadata": {},
+          "output_type": "display_data"
+        },
+        {
+          "data": {
+            "application/vnd.jupyter.widget-view+json": {
+              "model_id": "e2b5e0ddfc1741108fd7d92163bbea02",
+              "version_major": 2,
+              "version_minor": 0
+            },
+            "text/plain": [
+              "config.json: 0.00B [00:00, ?B/s]"
+            ]
+          },
+          "metadata": {},
+          "output_type": "display_data"
+        },
+        {
+          "data": {
+            "application/vnd.jupyter.widget-view+json": {
+              "model_id": "be20a20120464aa3964460614bc46c6b",
+              "version_major": 2,
+              "version_minor": 0
+            },
+            "text/plain": [
+              "spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]"
+            ]
+          },
+          "metadata": {},
+          "output_type": "display_data"
+        },
+        {
+          "data": {
+            "application/vnd.jupyter.widget-view+json": {
+              "model_id": "d0a80cf87a4a4035a09409c816e67b8f",
+              "version_major": 2,
+              "version_minor": 0
+            },
+            "text/plain": [
+              "tokenizer.json: 0.00B [00:00, ?B/s]"
+            ]
+          },
+          "metadata": {},
+          "output_type": "display_data"
+        },
+        {
+          "data": {
+            "application/vnd.jupyter.widget-view+json": {
+              "model_id": "4cc5466f071849e6a4b9338ddddfc7fc",
+              "version_major": 2,
+              "version_minor": 0
+            },
+            "text/plain": [
+              "special_tokens_map.json: 0.00B [00:00, ?B/s]"
+            ]
+          },
+          "metadata": {},
+          "output_type": "display_data"
+        },
+        {
+          "data": {
+            "application/vnd.jupyter.widget-view+json": {
+              "model_id": "c5db085240c84185a4fdeef9570873ff",
+              "version_major": 2,
+              "version_minor": 0
+            },
+            "text/plain": [
+              "pytorch_model.bin:   0%|          | 0.00/892M [00:00<?, ?B/s]"
+            ]
+          },
+          "metadata": {},
+          "output_type": "display_data"
+        },
+        {
+          "data": {
+            "application/vnd.jupyter.widget-view+json": {
+              "model_id": "5ab33f6bec6e46e6a8159a42ef725590",
+              "version_major": 2,
+              "version_minor": 0
+            },
+            "text/plain": [
+              "model.safetensors:   0%|          | 0.00/892M [00:00<?, ?B/s]"
+            ]
+          },
+          "metadata": {},
+          "output_type": "display_data"
+        },
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "I am going to get that report now.\n",
+            "I love going to the movies.\n"
+          ]
+        }
+      ],
+      "source": [
+        "!pip install -q transformers torch\n",
+        "\n",
+        "from transformers import AutoTokenizer, AutoModelForSeq2SeqLM\n",
+        "import torch\n",
+        "\n",
+        "model_id = \"rajistics/informal_formal_style_transfer\"\n",
+        "\n",
+        "tokenizer = AutoTokenizer.from_pretrained(model_id)\n",
+        "model = AutoModelForSeq2SeqLM.from_pretrained(model_id)\n",
+        "device = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n",
+        "model.to(device)\n",
+        "\n",
+        "def informal_to_formal(text, max_new_tokens=64, num_beams=4):\n",
+        "    inputs = tokenizer(text, return_tensors=\"pt\").to(device)\n",
+        "    with torch.no_grad():\n",
+        "        outputs = model.generate(\n",
+        "            **inputs,\n",
+        "            max_new_tokens=max_new_tokens,\n",
+        "            num_beams=num_beams,\n",
+        "            early_stopping=True,\n",
+        "            no_repeat_ngram_size=2,\n",
+        "        )\n",
+        "    return tokenizer.decode(outputs[0], skip_special_tokens=True).strip()\n",
+        "\n",
+        "# test\n",
+        "print(informal_to_formal(\"gimme that report now\"))\n",
+        "print(informal_to_formal(\"i loooooooooooooooooooooooove going to the movies.\"))\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 3,
+      "metadata": {
+        "id": "hgYDbUJ3jleL"
+      },
+      "outputs": [],
+      "source": [
+        "def informal_to_formal_prefixed(text, **gen_kwargs):\n",
+        "    prefixed = \"transfer Casual to Formal: \" + text\n",
+        "    return informal_to_formal(prefixed, **gen_kwargs)\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 4,
+      "metadata": {
+        "id": "K6KK6Rr2jwKt"
+      },
+      "outputs": [
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "Colab notebook detected. To show errors in colab notebook, set debug=True in launch()\n",
+            "* Running on public URL: https://591bd78c0ee0426622.gradio.live\n",
+            "\n",
+            "This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)\n"
+          ]
+        },
+        {
+          "data": {
+            "text/html": [
+              "<div><iframe src=\"https://591bd78c0ee0426622.gradio.live\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
+            ],
+            "text/plain": [
+              "<IPython.core.display.HTML object>"
+            ]
+          },
+          "metadata": {},
+          "output_type": "display_data"
+        },
+        {
+          "data": {
+            "text/plain": []
+          },
+          "execution_count": 4,
+          "metadata": {},
+          "output_type": "execute_result"
+        }
+      ],
+      "source": [
+        "import gradio as gr\n",
+        "\n",
+        "def formal_interface(text, max_len, beams):\n",
+        "    return informal_to_formal(text, max_new_tokens=int(max_len), num_beams=int(beams))\n",
+        "\n",
+        "demo = gr.Interface(\n",
+        "    fn=formal_interface,\n",
+        "    inputs=[\n",
+        "        gr.Textbox(lines=3, label=\"Informal text\"),\n",
+        "        gr.Slider(16, 128, value=64, step=4, label=\"Max new tokens\"),\n",
+        "        gr.Slider(1, 8, value=4, step=1, label=\"Beams\"),\n",
+        "    ],\n",
+        "    outputs=gr.Textbox(label=\"Formal text\"),\n",
+        "    title=\"Informal ➜ Formal \",\n",
+        ")\n",
+        "\n",
+        "demo.launch(share=True)\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 5,
+      "metadata": {
+        "id": "OGUU73oqj1gn"
+      },
+      "outputs": [
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "Model saved to: my_formal_t5_model\n"
+          ]
+        }
+      ],
+      "source": [
+        "model.save_pretrained(\"my_formal_t5_model\")\n",
+        "print(\"Model saved to: my_formal_t5_model\")"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": []
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "tASct-9QlqNk"
+      },
+      "outputs": [],
+      "source": [
+        "# Alternative 1: Save only model weights (state_dict)\n",
+        "import torch\n",
+        "torch.save(model.state_dict(), \"formal_model_weights.pth\")\n",
+        "print(\"Model weights saved to: formal_model_weights.pth\")\n",
+        "\n",
+        "# To load later:\n",
+        "# model = AutoModelForSeq2SeqLM.from_pretrained(model_id)\n",
+        "# model.load_state_dict(torch.load(\"formal_model_weights.pth\"))\n",
+        "# model.to(device)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "# Alternative 2: Save in SafeTensors format (more secure and faster loading)\n",
+        "try:\n",
+        "    from safetensors.torch import save_file\n",
+        "    save_file(model.state_dict(), \"formal_model_weights.safetensors\")\n",
+        "    print(\"Model saved in SafeTensors format: formal_model_weights.safetensors\")\n",
+        "except ImportError:\n",
+        "    print(\"SafeTensors not installed. Install with: pip install safetensors\")\n",
+        "\n",
+        "# To load SafeTensors:\n",
+        "# from safetensors.torch import load_file\n",
+        "# state_dict = load_file(\"formal_model_weights.safetensors\")\n",
+        "# model.load_state_dict(state_dict)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "# Alternative 3: Save model and tokenizer to a custom directory\n",
+        "model.save_pretrained(\"./my_custom_formal_model\")\n",
+        "tokenizer.save_pretrained(\"./my_custom_formal_model\")\n",
+        "print(\"Model and tokenizer saved to: ./my_custom_formal_model/\")\n",
+        "\n",
+        "# Alternative 4: Push to Hugging Face Hub (requires huggingface_hub)\n",
+        "# from huggingface_hub import login\n",
+        "# login()  # You'll need to authenticate\n",
+        "# model.push_to_hub(\"your-username/formal-style-transfer-model\")\n",
+        "# tokenizer.push_to_hub(\"your-username/formal-style-transfer-model\")\n",
+        "# print(\"Model pushed to Hugging Face Hub\")"
+      ]
+    }
+  ],
+  "metadata": {
+    "accelerator": "GPU",
+    "colab": {
+      "gpuType": "T4",
+      "private_outputs": true,
+      "provenance": []
+    },
+    "kernelspec": {
+      "display_name": "Python 3 (ipykernel)",
+      "language": "python",
+      "name": "python3"
+    },
+    "language_info": {
+      "codemirror_mode": {
+        "name": "ipython",
+        "version": 3
+      },
+      "file_extension": ".py",
+      "mimetype": "text/x-python",
+      "name": "python",
+      "nbconvert_exporter": "python",
+      "pygments_lexer": "ipython3",
+      "version": "3.12.12"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}

normal_to_genz.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff