import streamlit as st import matplotlib.pyplot as plt import torch.nn as nn import torch import numpy as np import os from torchvision.transforms import transforms from vit_pytorch import ViT from PIL import Image import torch import numpy as np import math from functools import partial import torch import torch.nn as nn from PIL import Image from torchvision import transforms import matplotlib.pyplot as plt import numpy as np from torch import nn from vit_pytorch import ViT import os import torch import numpy as np import math from functools import partial import torch import torch.nn as nn import io from PIL import Image from torchvision import transforms import numpy as np from torch import nn def load_model(): model = ViT( image_size=224, patch_size=16, num_classes=4, dim=768, depth=12, heads=12, mlp_dim=3072, dropout=0.1, ) model.load_state_dict(torch.load(r"model.pth", map_location=torch.device('cpu'))) return model model = load_model() os.environ['KMP_DUPLICATE_LIB_OK']='True' def trunc_normal_(tensor, mean=0., std=1., a=-2., b=2.): return _no_grad_trunc_normal_(tensor, mean, std, a, b) def _no_grad_trunc_normal_(tensor, mean, std, a, b): def norm_cdf(x): return (1. + math.erf(x / math.sqrt(2.))) / 2. def drop_path(x, drop_prob: float = 0., training: bool = False): if drop_prob == 0. or not training: return x keep_prob = 1 - drop_prob shape = (x.shape[0],) + (1,) * (x.ndim - 1) random_tensor = keep_prob + \ torch.rand(shape, dtype=x.dtype, device=x.device) random_tensor.floor_() # binarize output = x.div(keep_prob) * random_tensor return output class DropPath(nn.Module): """Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks). """ def __init__(self, drop_prob=None): super(DropPath, self).__init__() self.drop_prob = drop_prob def forward(self, x): return drop_path(x, self.drop_prob, self.training) class Mlp(nn.Module): def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.GELU, drop=0.): super().__init__() out_features = out_features or in_features hidden_features = hidden_features or in_features self.fc1 = nn.Linear(in_features, hidden_features) self.act = act_layer() self.fc2 = nn.Linear(hidden_features, out_features) self.drop = nn.Dropout(drop) def forward(self, x): x = self.fc1(x) x = self.act(x) x = self.drop(x) x = self.fc2(x) x = self.drop(x) return x class Attention(nn.Module): def __init__(self, dim, num_heads=8, qkv_bias=False, qk_scale=None, attn_drop=0., proj_drop=0.): super().__init__() self.num_heads = num_heads head_dim = dim // num_heads self.scale = qk_scale or head_dim ** -0.5 self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias) self.attn_drop = nn.Dropout(attn_drop) self.proj = nn.Linear(dim, dim) self.proj_drop = nn.Dropout(proj_drop) def forward(self, x): B, N, C = x.shape qkv = self.qkv(x).reshape(B, N, 3, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4) q, k, v = qkv[0], qkv[1], qkv[2] attn = (q @ k.transpose(-2, -1)) * self.scale attn = attn.softmax(dim=-1) attn = self.attn_drop(attn) x = (attn @ v).transpose(1, 2).reshape(B, N, C) x = self.proj(x) x = self.proj_drop(x) return x, attn class Block(nn.Module): def __init__(self, dim, num_heads, mlp_ratio=4., qkv_bias=False, qk_scale=None, drop=0., attn_drop=0., drop_path=0., act_layer=nn.GELU, norm_layer=nn.LayerNorm): super().__init__() self.norm1 = norm_layer(dim) self.attn = Attention( dim, num_heads=num_heads, qkv_bias=qkv_bias, qk_scale=qk_scale, attn_drop=attn_drop, proj_drop=drop) self.drop_path = DropPath( drop_path) if drop_path > 0. else nn.Identity() self.norm2 = norm_layer(dim) mlp_hidden_dim = int(dim * mlp_ratio) self.mlp = Mlp(in_features=dim, hidden_features=mlp_hidden_dim, act_layer=act_layer, drop=drop) def forward(self, x, return_attention=False): y, attn = self.attn(self.norm1(x)) if return_attention: return attn x = x + self.drop_path(y) x = x + self.drop_path(self.mlp(self.norm2(x))) return x class PatchEmbed(nn.Module): """ Image to Patch Embedding """ def __init__(self, img_size=224, patch_size=16, in_chans=3, embed_dim=768): super().__init__() num_patches = (img_size // patch_size) * (img_size // patch_size) self.img_size = img_size self.patch_size = patch_size self.num_patches = num_patches self.proj = nn.Conv2d(in_chans, embed_dim, kernel_size=patch_size, stride=patch_size) def forward(self, x): B, C, H, W = x.shape x = self.proj(x).flatten(2).transpose(1, 2) return x class VisionTransformer(nn.Module): """ Vision Transformer """ def __init__(self, img_size=[224], patch_size=16, in_chans=3, num_classes=2, embed_dim=768, depth=12, num_heads=12, mlp_ratio=4., qkv_bias=False, qk_scale=None, drop_rate=0., attn_drop_rate=0., drop_path_rate=0., norm_layer=nn.LayerNorm, **kwargs): super().__init__() self.num_features = self.embed_dim = embed_dim self.patch_embed = PatchEmbed( img_size=img_size[0], patch_size=patch_size, in_chans=in_chans, embed_dim=embed_dim) num_patches = self.patch_embed.num_patches self.cls_token = nn.Parameter(torch.zeros(1, 1, embed_dim)) self.pos_embed = nn.Parameter( torch.zeros(1, num_patches + 1, embed_dim)) self.pos_drop = nn.Dropout(p=drop_rate) dpr = [x.item() for x in torch.linspace(0, drop_path_rate, depth)] self.blocks = nn.ModuleList([ Block( dim=embed_dim, num_heads=num_heads, mlp_ratio=mlp_ratio, qkv_bias=qkv_bias, qk_scale=qk_scale, drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[i], norm_layer=norm_layer) for i in range(depth)]) self.norm = norm_layer(embed_dim) self.head = nn.Linear( embed_dim, num_classes) if num_classes > 0 else nn.Identity() trunc_normal_(self.pos_embed, std=.02) trunc_normal_(self.cls_token, std=.02) self.apply(self._init_weights) def _init_weights(self, m): if isinstance(m, nn.Linear): trunc_normal_(m.weight, std=.02) if isinstance(m, nn.Linear) and m.bias is not None: nn.init.constant_(m.bias, 0) elif isinstance(m, nn.LayerNorm): nn.init.constant_(m.bias, 0) nn.init.constant_(m.weight, 1.0) def interpolate_pos_encoding(self, x, w, h): npatch = x.shape[1] - 1 N = self.pos_embed.shape[1] - 1 if npatch == N and w == h: return self.pos_embed class_pos_embed = self.pos_embed[:, 0] patch_pos_embed = self.pos_embed[:, 1:] dim = x.shape[-1] w0 = w // self.patch_embed.patch_size h0 = h // self.patch_embed.patch_size w0, h0 = w0 + 0.1, h0 + 0.1 patch_pos_embed = nn.functional.interpolate( patch_pos_embed.reshape(1, int(math.sqrt(N)), int( math.sqrt(N)), dim).permute(0, 3, 1, 2), scale_factor=(w0 / math.sqrt(N), h0 / math.sqrt(N)), mode='bicubic', ) assert int( w0) == patch_pos_embed.shape[-2] and int(h0) == patch_pos_embed.shape[-1] patch_pos_embed = patch_pos_embed.permute(0, 2, 3, 1).view(1, -1, dim) return torch.cat((class_pos_embed.unsqueeze(0), patch_pos_embed), dim=1) def prepare_tokens(self, x): B, nc, w, h = x.shape x = self.patch_embed(x) cls_tokens = self.cls_token.expand(B, -1, -1) x = torch.cat((cls_tokens, x), dim=1) x = x + self.interpolate_pos_encoding(x, w, h) return self.pos_drop(x) def forward(self, x): x = self.prepare_tokens(x) for blk in self.blocks: x = blk(x) x = self.norm(x) return x[:, 0] def get_last_selfattention(self, x): x = self.prepare_tokens(x) for i, blk in enumerate(self.blocks): if i < len(self.blocks) - 1: x = blk(x) else: return blk(x, return_attention=True) def get_intermediate_layers(self, x, n=1): x = self.prepare_tokens(x) output = [] for i, blk in enumerate(self.blocks): x = blk(x) if len(self.blocks) - i <= n: output.append(self.norm(x)) return output class VitGenerator(object): def __init__(self, name_model, patch_size, device, evaluate=True, random=False, verbose=False): self.name_model = name_model self.patch_size = patch_size self.evaluate = evaluate self.device = device self.verbose = verbose self.model = self._getModel() self._initializeModel() if not random: self._loadPretrainedWeights() def _getModel(self): if self.verbose: print( f"[INFO] Initializing {self.name_model} with patch size of {self.patch_size}") if self.name_model == 'vit_tiny': model = VisionTransformer(patch_size=self.patch_size, embed_dim=192, depth=12, num_heads=3, mlp_ratio=4, qkv_bias=True, norm_layer=partial(nn.LayerNorm, eps=1e-6)) elif self.name_model == 'vit_small': model = VisionTransformer(patch_size=self.patch_size, embed_dim=384, depth=12, num_heads=6, mlp_ratio=4, qkv_bias=True, norm_layer=partial(nn.LayerNorm, eps=1e-6)) elif self.name_model == 'vit_base': model = VisionTransformer(patch_size=self.patch_size, embed_dim=768, depth=12, num_heads=12, mlp_ratio=4, qkv_bias=True, norm_layer=partial(nn.LayerNorm, eps=1e-6)) else: raise f"No model found with {self.name_model}" return model def _initializeModel(self): if self.evaluate: for p in self.model.parameters(): p.requires_grad = False self.model.eval() self.model.to(self.device) def _loadPretrainedWeights(self): if self.verbose: print("[INFO] Loading weights") from vit_pytorch import ViT model = ViT( image_size=224, patch_size=16, num_classes=4, dim=768, depth=12, heads=12, mlp_dim=3072, dropout=0.1, ) model.load_state_dict(torch.load(r"model.pth", map_location=torch.device('cpu'))) print("Loading fine tuned model.") def get_last_selfattention(self, img): return self.model.get_last_selfattention(img.to(self.device)) def __call__(self, x): return self.model(x) def transform(img, img_size): img = transforms.Resize(img_size)(img) img = transforms.ToTensor()(img) return img def visualize_predict(model, img, img_size, patch_size, device): img_pre = transform(img, img_size) attention = visualize_attention(model, img_pre, patch_size, device) plot_attention(img, attention) def visualize_attention(model, img, patch_size, device): w, h = img.shape[1] - img.shape[1] % patch_size, img.shape[2] - \ img.shape[2] % patch_size img = img[:, :w, :h].unsqueeze(0) w_featmap = img.shape[-2] // patch_size h_featmap = img.shape[-1] // patch_size attentions = model.get_last_selfattention(img.to(device)) nh = attentions.shape[1] attentions = attentions[0, :, 0, 1:].reshape(nh, -1) attentions = attentions.reshape(nh, w_featmap, h_featmap) attentions = nn.functional.interpolate(attentions.unsqueeze(0), scale_factor=patch_size, mode="nearest")[0].cpu().numpy() return attentions device = torch.device("cpu") import os import torch import numpy as np import math from functools import partial import torch import torch.nn as nn import io from PIL import Image from torchvision import transforms import matplotlib.pyplot as plt import numpy as np from torch import nn def transform(img, img_size): img = transforms.Resize(img_size)(img) img = transforms.ToTensor()(img) return img def visualize_predict(model, img, img_size, patch_size, device): img_pre = transform(img, img_size) attention = visualize_attention(model, img_pre, patch_size, device) plot_attention(img, attention) def visualize_attention(model, img, patch_size, device): w, h = img.shape[1] - img.shape[1] % patch_size, img.shape[2] - \ img.shape[2] % patch_size img = img[:, :w, :h].unsqueeze(0) w_featmap = img.shape[-2] // patch_size h_featmap = img.shape[-1] // patch_size attentions = model.get_last_selfattention(img.to(device)) nh = attentions.shape[1] attentions = attentions[0, :, 0, 1:].reshape(nh, -1) attentions = attentions.reshape(nh, w_featmap, h_featmap) attentions = nn.functional.interpolate(attentions.unsqueeze( 0), scale_factor=patch_size, mode="nearest")[0].cpu().numpy() return attentions def plot_attention(img, attention): n_heads = attention.shape[0] # Display the original image st.image(img, caption="Original", use_column_width=False) # Display the summary of attention plots summary_attention = np.mean(attention, 0) fig_summary_attention, ax_summary_attention = plt.subplots() ax_summary_attention.imshow(summary_attention, cmap='inferno') st.write("Mean head") st.pyplot(fig_summary_attention) # Display the predicted class and confidence # Display individual attention head plots for i in range(n_heads): fig_attention, ax_attention = plt.subplots() ax_attention.imshow(attention[i], cmap='inferno') st.write(f"head: {i}") st.pyplot(fig_attention) import streamlit as st import matplotlib.pyplot as plt import numpy as np import torch import torchvision.transforms as transforms from PIL import Image from vit_pytorch import ViT def Classification(img_path): transform = transforms.Compose([ transforms.Resize((224, 224)), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ]) image_paths = img_path input_image = Image.open(image_paths) input_tensor = transform(input_image).unsqueeze(0) model_resnet.eval() with torch.no_grad(): output = model_resnet(input_tensor) _, predicted_class = output.max(1) softmax = nn.Softmax(dim=1) probabilities = softmax(output) class_names = ['Basal_Carcinoma', 'Melanoma', 'Not_EarCancer', 'Squamous_Carcinoma'] predicted_class_name = class_names[predicted_class.item()] confidence_score = probabilities.max().item() x = confidence_score confidence_score = "{:.2f}".format(confidence_score*100)+"%" print("Predicted Class:", predicted_class_name) print("Confidence Score:", confidence_score) return predicted_class_name, confidence_score from streamlit_cropper import st_cropper def form(): container_form = st.empty() questions = [ "1- 2 อาทิตย์ที่ผ่านได้ทํากิจกรรมที่ต้องอยู่ท่ามกลางแสงเเดดในปริมาณมากหรือไม่?", "ช่วงนี้มีอาการปวดที่บริเวณหูหรือไม่?", "ช่วงนี้รู้สึกว่าตนเองมีความสามารถในการรับเสียงลดลงหรือไม่?", "พบของเหลวใสหรือเมือกขุ่นๆไหลออกมาจากรูหูหรือไม่?", "มีสมาชิกในครอบครัวคนใดเคยเป็นโรคมะเร็งหูชั้นนอกหรือไม่?" ] with container_form.container(): st.sidebar.title("แบบวินิจฉัยโรค") if "current_question" not in st.session_state: st.session_state.current_question = 0 if "no_count" not in st.session_state: st.session_state.no_count = 0 if "answered" not in st.session_state: st.session_state.answered = False current_question = st.session_state.current_question st.sidebar.subheader(questions[current_question]) st.sidebar.write(f"ข้อที่: {current_question+1}/5") answer = st.sidebar.radio("คำตอบ", ["ใช่", "ไม่"]) button = st.sidebar.button("ต่อไป",use_container_width=True,disabled=False,type="primary") print(current_question) if current_question > 4: print("clear!!!") container_form.empty() global form_score form_score = st.session_state.no_count del form # st.session_state.current_question += 1 # Move to the next question to avoid showing the form again if answer == "ไม่" and button and not st.session_state.answered: st.session_state.no_count += 1 st.session_state.answered = True if button: if current_question < len(questions): st.session_state.current_question += 1 st.session_state.answered = False return st.session_state.no_count try: form_score = form() except: form_score = st.session_state.no_count def save_image(image_data): temp_file = io.BytesIO(image_data.read()) img = Image.open(temp_file) save_dir = 'images' os.makedirs(save_dir, exist_ok=True) image_path = os.path.join(save_dir, 'captured_image.jpg') img.save(image_path) return image_path def cropper_and_bar(): st.set_option('deprecation.showfileUploaderEncoding', False) box_color = st.sidebar.color_picker(label="Box Color", value='#0000FF') # realtime_update = st.sidebar.checkbox(label="Update in Real Time", value=True) realtime_update = False aspect_ratio = None factor_reduce = st.sidebar.select_slider('ปรับความละเอียดภาพวินิฉัย',options=[str(i) for i in range(1,11)]) factor_reduce = abs(int(factor_reduce)-12) st.sidebar.write(factor_reduce) return realtime_update,box_color,aspect_ratio,factor_reduce st.title('ระบบวินิฉัยโรคมะเร็งหู') st.info('กรุณาถ่ายภาพหูอย่างชัดเจนและควรใช้กล้องคุณภาพสูง เพื่อประสิทธิภาพที่ดีที่สุด', icon="ℹ️") img_path = st.camera_input(label="From camera",label_visibility="hidden") realtime_update,box_color,aspect_ratio,factor_reduce = cropper_and_bar() img_upload = st.sidebar.file_uploader(label='Upload a file', type=['png', 'jpg']) if img_upload: img = Image.open(img_upload) if not realtime_update: st.write("กดสองครั้งเพื่อเลือก") cropped_img = st_cropper(img, realtime_update=realtime_update, box_color=box_color, aspect_ratio=aspect_ratio) st.write("Preview") _ = cropped_img.thumbnail((224,224)) st.image(cropped_img) def get_sample_images(folder): sample_images = [] for filename in os.listdir(folder): if filename.endswith(".jpg") or filename.endswith(".png"): sample_images.append(os.path.join(folder, filename)) return sample_images sample_folder = "sample_image" sample_images = get_sample_images(sample_folder) # Display the sample images in the sidebar selection = st.sidebar.selectbox("Select a sample", ["Browse files Mode"] + sample_images, format_func=lambda x: os.path.basename(x)) if selection and selection != "Browse files Mode": img_path= selection if img_path: img = Image.open(img_path) if not realtime_update: st.write("กดสองครั้งเพื่อเลือก") cropped_img = st_cropper(img, realtime_update=realtime_update, box_color=box_color, aspect_ratio=aspect_ratio) st.write("Preview") _ = cropped_img.thumbnail((224,224)) st.image(cropped_img) if img_path or img_upload is not None: image_path = (cropped_img) result = st.button("วินิฉัย",type="primary",use_container_width=True) if result: st.write('Calculating results...') name_model = 'vit_small' import torch.nn as nn import torchvision.models as models device = torch.device("cpu") model_resnet = models.resnet50(pretrained=False) num_classes = 4 model_resnet.fc = nn.Linear(2048, num_classes) model_resnet.load_state_dict(torch.load(r'resnet.pth',map_location=torch.device('cpu'))) name_model = 'vit_small' patch_size = 1 save_dir = "./images" os.makedirs(save_dir, exist_ok=True) save_path = os.path.join(save_dir, "cropped_img.jpg") cropped_img.save(save_path) path = save_path img = Image.open(path) predicted, threshold = Classification(path) print(4*form_score) if predicted != 'Not_EarCancer': newthreshold = float(threshold.replace("%","")) - (4*form_score) else: newthreshold = float(threshold.replace("%","")) if predicted != 'Not_EarCancer': if newthreshold > 50: if newthreshold >70: st.markdown(f'