Spaces:

mnavaidd
/

git-caption-generation-for-chest-xray

Runtime error

App Files Files Community

navaid-appedology commited on Aug 28, 2023

Commit

836577c

1 Parent(s): 468dbac

add model files and app.py

Browse files

Files changed (9) hide show

app.py +60 -0
model/config.json +107 -0
model/generation_config.json +7 -0
model/preprocessor_config.json +28 -0
model/pytorch_model.bin +3 -0
model/special_tokens_map.json +7 -0
model/tokenizer.json +0 -0
model/tokenizer_config.json +18 -0
model/vocab.txt +0 -0

app.py ADDED Viewed

	@@ -0,0 +1,60 @@

+# -*- coding: utf-8 -*-
+"""deploy_GIT_with_Gradio.ipynb
+Automatically generated by Colaboratory.
+Original file is located at
+    https://colab.research.google.com/drive/1s3Aa-QBjUtT2sW6HuRHoVaWvpJAaQIn5
+"""
+!pip install -q gradio
+!pip install -q transformers
+import transformers
+from transformers import BlipProcessor, BlipForImageTextRetrieval,BlipForConditionalGeneration, AutoProcessor
+from transformers import AutoModelForCausalLM
+import torch
+import gradio as gr
+from PIL import Image
+# Load model from Huggingface Transformer library
+# processor = AutoProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
+# model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
+model_path = "model"
+processor = AutoProcessor.from_pretrained(model_path)
+model = AutoModelForCausalLM.from_pretrained(model_path)
+# def predict_text_classification(image):
+#     inputs = processor(images=image, return_tensors="pt").to(device)
+#     pixel_values = inputs.pixel_values
+#     generated_ids = model.generate(pixel_values=pixel_values, max_length=50)
+#     generated_caption = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
+#     return generated_caption
+# Define the prediction function
+def generate_caption(image):
+    # Process the image
+    image = Image.fromarray(image)
+    #inputs = tokenizer(image, return_tensors="pt")
+    inputs = processor(images=image, return_tensors="pt")#.to(device)
+    pixel_values = inputs.pixel_values
+    # Generate caption
+    generated_ids = model.generate(pixel_values=pixel_values, max_length=50)
+    generated_caption = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
+    return generated_caption
+# Define the Gradio interface
+interface = gr.Interface(
+    fn=generate_caption,
+    inputs=gr.Image(),
+    outputs=gr.Textbox(),
+    live=True,
+    #capture_session=True  # Required for handling PIL Image in the prediction function
+)
+# Launch the Gradio interface
+interface.launch(share=True)

model/config.json ADDED Viewed

	@@ -0,0 +1,107 @@

+{
+  "_commit_hash": "1f7fe8444292beb4a259e3a5b6eba440cd5999d4",
+  "_name_or_path": "microsoft/git-base",
+  "architectures": [
+    "GitForCausalLM"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "bos_token_id": 101,
+  "classifier_dropout": null,
+  "eos_token_id": 102,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 1024,
+  "model_type": "git",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 6,
+  "num_image_with_embedding": null,
+  "pad_token_id": 0,
+  "position_embedding_type": "absolute",
+  "tie_word_embeddings": false,
+  "torch_dtype": "float32",
+  "transformers_version": null,
+  "use_cache": true,
+  "vision_config": {
+    "_name_or_path": "",
+    "add_cross_attention": false,
+    "architectures": null,
+    "attention_dropout": 0.0,
+    "bad_words_ids": null,
+    "begin_suppress_tokens": null,
+    "bos_token_id": null,
+    "chunk_size_feed_forward": 0,
+    "cross_attention_hidden_size": null,
+    "decoder_start_token_id": null,
+    "diversity_penalty": 0.0,
+    "do_sample": false,
+    "dropout": 0.0,
+    "early_stopping": false,
+    "encoder_no_repeat_ngram_size": 0,
+    "eos_token_id": null,
+    "exponential_decay_length_penalty": null,
+    "finetuning_task": null,
+    "forced_bos_token_id": null,
+    "forced_eos_token_id": null,
+    "hidden_act": "quick_gelu",
+    "hidden_size": 768,
+    "id2label": {
+      "0": "LABEL_0",
+      "1": "LABEL_1"
+    },
+    "image_size": 224,
+    "initializer_factor": 1.0,
+    "initializer_range": 0.02,
+    "intermediate_size": 3072,
+    "is_decoder": false,
+    "is_encoder_decoder": false,
+    "label2id": {
+      "LABEL_0": 0,
+      "LABEL_1": 1
+    },
+    "layer_norm_eps": 1e-05,
+    "length_penalty": 1.0,
+    "max_length": 20,
+    "min_length": 0,
+    "model_type": "git_vision_model",
+    "no_repeat_ngram_size": 0,
+    "num_attention_heads": 12,
+    "num_beam_groups": 1,
+    "num_beams": 1,
+    "num_channels": 3,
+    "num_hidden_layers": 12,
+    "num_return_sequences": 1,
+    "output_attentions": false,
+    "output_hidden_states": false,
+    "output_scores": false,
+    "pad_token_id": null,
+    "patch_size": 16,
+    "prefix": null,
+    "problem_type": null,
+    "projection_dim": 512,
+    "pruned_heads": {},
+    "remove_invalid_values": false,
+    "repetition_penalty": 1.0,
+    "return_dict": true,
+    "return_dict_in_generate": false,
+    "sep_token_id": null,
+    "suppress_tokens": null,
+    "task_specific_params": null,
+    "temperature": 1.0,
+    "tf_legacy_loss": false,
+    "tie_encoder_decoder": false,
+    "tie_word_embeddings": true,
+    "tokenizer_class": null,
+    "top_k": 50,
+    "top_p": 1.0,
+    "torch_dtype": null,
+    "torchscript": false,
+    "transformers_version": "4.30.2",
+    "typical_p": 1.0,
+    "use_bfloat16": false
+  },
+  "vocab_size": 30522
+}

model/generation_config.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "_from_model_config": true,
+  "bos_token_id": 101,
+  "eos_token_id": 102,
+  "pad_token_id": 0,
+  "transformers_version": "4.30.2"
+}

model/preprocessor_config.json ADDED Viewed

	@@ -0,0 +1,28 @@

+{
+  "crop_size": {
+    "height": 224,
+    "width": 224
+  },
+  "do_center_crop": true,
+  "do_convert_rgb": true,
+  "do_normalize": true,
+  "do_rescale": true,
+  "do_resize": true,
+  "image_mean": [
+    0.48145466,
+    0.4578275,
+    0.40821073
+  ],
+  "image_processor_type": "CLIPImageProcessor",
+  "image_std": [
+    0.26862954,
+    0.26130258,
+    0.27577711
+  ],
+  "processor_class": "GitProcessor",
+  "resample": 3,
+  "rescale_factor": 0.00392156862745098,
+  "size": {
+    "shortest_edge": 224
+  }
+}

model/pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5cbe4226c595094a82da0b880bf2c89c9f35874453d293887f42e7c4d6ea1d30
+size 706594713

model/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

model/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

model/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,18 @@

+{
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_lower_case": true,
+  "mask_token": "[MASK]",
+  "model_input_names": [
+    "input_ids",
+    "attention_mask"
+  ],
+  "model_max_length": 512,
+  "pad_token": "[PAD]",
+  "processor_class": "GitProcessor",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "BertTokenizer",
+  "unk_token": "[UNK]"
+}

model/vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff