Spaces:

Srujan111
/

Image_classification

Runtime error

App Files Files Community

Srujan111 commited on Sep 26, 2023

Commit

02a464a

1 Parent(s): a396062

Delete app.ipynb

Browse files

Files changed (1) hide show

app.ipynb +0 -204

app.ipynb DELETED Viewed

@@ -1,204 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 38,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "c:\\Users\\Srujan Jujare\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\tqdm\\auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
-      "  from .autonotebook import tqdm as notebook_tqdm\n"
-     ]
-    }
-   ],
-   "source": [
-    "from transformers import VisionEncoderDecoderModel, ViTImageProcessor, AutoTokenizer\n",
-    "import torch\n",
-    "from PIL import Image"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 39,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "VisionEncoderDecoderModel(\n",
-       "  (encoder): ViTModel(\n",
-       "    (embeddings): ViTEmbeddings(\n",
-       "      (patch_embeddings): ViTPatchEmbeddings(\n",
-       "        (projection): Conv2d(3, 768, kernel_size=(16, 16), stride=(16, 16))\n",
-       "      )\n",
-       "      (dropout): Dropout(p=0.0, inplace=False)\n",
-       "    )\n",
-       "    (encoder): ViTEncoder(\n",
-       "      (layer): ModuleList(\n",
-       "        (0-11): 12 x ViTLayer(\n",
-       "          (attention): ViTAttention(\n",
-       "            (attention): ViTSelfAttention(\n",
-       "              (query): Linear(in_features=768, out_features=768, bias=True)\n",
-       "              (key): Linear(in_features=768, out_features=768, bias=True)\n",
-       "              (value): Linear(in_features=768, out_features=768, bias=True)\n",
-       "              (dropout): Dropout(p=0.0, inplace=False)\n",
-       "            )\n",
-       "            (output): ViTSelfOutput(\n",
-       "              (dense): Linear(in_features=768, out_features=768, bias=True)\n",
-       "              (dropout): Dropout(p=0.0, inplace=False)\n",
-       "            )\n",
-       "          )\n",
-       "          (intermediate): ViTIntermediate(\n",
-       "            (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
-       "            (intermediate_act_fn): GELUActivation()\n",
-       "          )\n",
-       "          (output): ViTOutput(\n",
-       "            (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
-       "            (dropout): Dropout(p=0.0, inplace=False)\n",
-       "          )\n",
-       "          (layernorm_before): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
-       "          (layernorm_after): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
-       "        )\n",
-       "      )\n",
-       "    )\n",
-       "    (layernorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
-       "    (pooler): ViTPooler(\n",
-       "      (dense): Linear(in_features=768, out_features=768, bias=True)\n",
-       "      (activation): Tanh()\n",
-       "    )\n",
-       "  )\n",
-       "  (decoder): GPT2LMHeadModel(\n",
-       "    (transformer): GPT2Model(\n",
-       "      (wte): Embedding(50257, 768)\n",
-       "      (wpe): Embedding(1024, 768)\n",
-       "      (drop): Dropout(p=0.1, inplace=False)\n",
-       "      (h): ModuleList(\n",
-       "        (0-11): 12 x GPT2Block(\n",
-       "          (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n",
-       "          (attn): GPT2Attention(\n",
-       "            (c_attn): Conv1D()\n",
-       "            (c_proj): Conv1D()\n",
-       "            (attn_dropout): Dropout(p=0.1, inplace=False)\n",
-       "            (resid_dropout): Dropout(p=0.1, inplace=False)\n",
-       "          )\n",
-       "          (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n",
-       "          (crossattention): GPT2Attention(\n",
-       "            (c_attn): Conv1D()\n",
-       "            (q_attn): Conv1D()\n",
-       "            (c_proj): Conv1D()\n",
-       "            (attn_dropout): Dropout(p=0.1, inplace=False)\n",
-       "            (resid_dropout): Dropout(p=0.1, inplace=False)\n",
-       "          )\n",
-       "          (ln_cross_attn): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n",
-       "          (mlp): GPT2MLP(\n",
-       "            (c_fc): Conv1D()\n",
-       "            (c_proj): Conv1D()\n",
-       "            (act): NewGELUActivation()\n",
-       "            (dropout): Dropout(p=0.1, inplace=False)\n",
-       "          )\n",
-       "        )\n",
-       "      )\n",
-       "      (ln_f): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n",
-       "    )\n",
-       "    (lm_head): Linear(in_features=768, out_features=50257, bias=False)\n",
-       "  )\n",
-       ")"
-      ]
-     },
-     "execution_count": 39,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "model = VisionEncoderDecoderModel.from_pretrained(\"nlpconnect/vit-gpt2-image-captioning\")\n",
-    "feature_extractor = ViTImageProcessor.from_pretrained(\"nlpconnect/vit-gpt2-image-captioning\")\n",
-    "tokenizer = AutoTokenizer.from_pretrained(\"nlpconnect/vit-gpt2-image-captioning\")\n",
-    "\n",
-    "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
-    "model.to(device)\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 40,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "max_length = 16\n",
-    "num_beams = 4\n",
-    "gen_kwargs = {\"max_length\": max_length, \"num_beams\": num_beams}\n",
-    "def predict_step(image_paths):\n",
-    "  images = []\n",
-    "  for image_path in image_paths:\n",
-    "    i_image = Image.open(image_path)\n",
-    "    if i_image.mode != \"RGB\":\n",
-    "      i_image = i_image.convert(mode=\"RGB\")\n",
-    "\n",
-    "    images.append(i_image)\n",
-    "\n",
-    "  pixel_values = feature_extractor(images=images, return_tensors=\"pt\").pixel_values\n",
-    "  pixel_values = pixel_values.to(device)\n",
-    "\n",
-    "  output_ids = model.generate(pixel_values, **gen_kwargs)\n",
-    "\n",
-    "  preds = tokenizer.batch_decode(output_ids, skip_special_tokens=True)\n",
-    "  preds = [pred.strip() for pred in preds]\n",
-    "  return preds"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 41,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "We strongly recommend passing in an `attention_mask` since your input_ids may be padded. See https://huggingface.co/docs/transformers/troubleshooting#incorrect-output-when-padding-tokens-arent-masked.\n",
-      "You may ignore this warning if your `pad_token_id` (50256) is identical to the `bos_token_id` (50256), `eos_token_id` (50256), or the `sep_token_id` (None), and your input is not padded.\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "['a clock on a dashboard of a car']"
-      ]
-     },
-     "execution_count": 41,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "predict_step(['D:\\\\Validation\\\\Class 2\\\\i17.jpg'])"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.11.5"
-  },
-  "orig_nbformat": 4
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}