Spaces:

woodr7
/

CustomClassifier

Runtime error

App Files Files Community

CustomClassifier / app.py

WoodLB

fixed filename error

982ab38 about 2 years ago

raw

history blame contribute delete

13.2 kB

	import streamlit as st

	# x = st.slider("Select a value")
	# st.write(x, "squared is", x * x)
	st.title('Welcome to the Labelbox custom classifier training application!')
	st.header('In this module you will be able to add data to your instance if you dont already have it, and then use bulk classification to train a custom image classification model')
	st.subheader("If you don't have data in your org, enter your API Click the button below! Otherwise, Skip to section 2")
	st.video("https://storage.googleapis.com/app-videos/How%20to%20Make%20an%20API%20Key%20on%20labelbox.com.mp4")
	st.write("this video show you how to create an API key in Labelbox")
	# -- coding: utf-8 --
	"""
	Original file is located at
	https://colab.research.google.com/drive/1nOSff67KXhNgX_XSfnv3xnddobRoaK0d

	"""

	api_key = st.text_input("Enter your api key:", type="password")

	import labelbox
	import labelpandas as lp
	import os
	import pandas as pd
	from tensorflow.python.lib.io import file_io
	import io
	from pandas import read_csv

	# read csv file from google cloud storage
	def read_data(gcs_path):
	file_stream = file_io.FileIO(gcs_path, mode='r')
	csv_data = read_csv(io.StringIO(file_stream.read()))
	return csv_data
	def freedatatolb(amount_of_data):
	client = lp.Client(api_key)
	gcs_path = 'https://storage.googleapis.com/solution_accelerator_datasets/images_styles.csv'
	df = pd.read_csv(gcs_path)
	df = df.drop(['id', 'season', 'usage', 'year',"gender", "masterCategory", "subCategory", "articleType","baseColour"], axis =1)
	fields ={"row_data":["link"], # Column containing URL to asset (single)
	"global_key": ['filename'], # Column containing globalkey value (single, unique)
	"external_id": ["productDisplayName"], # Column containing external ID value (single)
	"metadata_string": [], # Column containing string metadata values (multiple)
	"metadata_number": [], # Column containing number metadata values (multiple)
	"metadata_datetime": [] # Column containing datetime metadata values (multiple, must be ISO 8601)
	}
	columns = {}

	for field in fields.keys():
	for name in fields[field]:
	if field.startswith('metadata'):
	columns[name] = f"{field.split('_')[0]}///{field.split('_')[1]}///{name}"
	else:
	columns[name] = field
	new_df = df.rename(columns=(columns))
	testdf = new_df.head(amount_of_data)
	dataset_id = client.lb_client.create_dataset(name = str(gcs_path.split('/')[-1])).uid
	# dataset_id = client.lb_client.get_dataset("c4b7prd6207850000lljx2hr8").uid
	results = client.create_data_rows_from_table(
	table = testdf,
	dataset_id = dataset_id,
	skip_duplicates = True, # If True, will skip data rows where a global key is already in use,
	verbose = True, # If True, prints information about code execution
	)
	return results
	data_amount = st.slider("choose amout of data to add to labelbox", 500, 1000)
	if st.button("Add data to your Labelbox"):
	st.write(f"adding {data_amount} datarows to Labelbox instance")
	bing = freedatatolb(data_amount)
	st.title("SECTION 2")
	st.header("Create project and bulk classify images")
	st.video("https://storage.googleapis.com/app-videos/Setting%20up%20Platform%20for%20Training%20a%20Model.mp4")
	st.write("this video will help you set up a project for storing bulk classifications")
	st.video("https://storage.googleapis.com/app-videos/Bulk%20Classification%20and%20Training%20Our%20Model.mp4")
	st.write("this video teaches how to bulk classify the images and set up our model for training")
	st.title("SECTION 3")
	st.header("Auto Image classifier training and inference: Imagnet Weights")

	# -- coding: utf-8 --
	"""
	Original file is located at
	https://colab.research.google.com/drive/1CSyAE9DhwGTl7bLaSoo7QSyMuoEqJpCj

	"""

	def train_and_inference(api_key, ontology_id, model_run_id):
	# st.write('thisisstarting')
	api_key = api_key # insert Labelbox API key
	ontology_id = ontology_id # get the ontology ID from the Settings tab at the top left of your model run
	model_run_id = model_run_id #get the model run ID from the settings gear icon on the right side of your Model Run
	# st.write('1')
	import pydantic
	# st.write(pydantic.__version__)

	import numpy as np
	# st.write('2')
	import tensorflow as tf
	# st.write('3')
	from tensorflow.keras import layers
	# st.write('4')
	from tensorflow.keras.models import Sequential
	# st.write('5')
	from tensorflow.keras.preprocessing.image import ImageDataGenerator
	# st.write('6')
	import os
	# st.write('7')
	import labelbox
	# st.write('zat')
	from labelbox import Client
	# st.write('8')
	# st.write('9')
	import numpy as np
	import tensorflow as tf
	from tensorflow.keras import layers
	from tensorflow.keras.models import Sequential
	from tensorflow.keras.preprocessing.image import ImageDataGenerator
	import os
	from labelbox.schema.ontology import OntologyBuilder, Tool, Classification, Option
	from labelbox import Client, LabelingFrontend, LabelImport, MALPredictionImport
	from labelbox.data.annotation_types import (
	Label, ImageData, ObjectAnnotation, MaskData,
	Rectangle, Point, Line, Mask, Polygon,
	Radio, Checklist, Text,
	ClassificationAnnotation, ClassificationAnswer
	)
	from labelbox import MediaType
	from labelbox.data.serialization import NDJsonConverter
	import pandas as pd
	import shutil
	import labelbox.data
	import scipy

	import json
	import uuid
	import time
	import requests
	import pandas as pd
	import shutil

	import json
	import uuid
	import time
	import requests
	# st.write('imports')

	"""Connect to labelbox client
	Define Model Variables
	"""

	client = Client(api_key)
	EPOCHS = 10

	"""#Setup Training

	Export Classifications from Model Run
	"""

	model_run = client.get_model_run(model_run_id)

	client.enable_experimental = True
	data_json = model_run.export_labels(download=True)
	print(data_json)

	"""Separate datarows into folders."""

	import requests
	import os

	from urllib.parse import unquote

	def download_and_save_image(url, destination_folder, filename):
	try:
	# Decode the URL
	url = unquote(url)

	# Ensure destination directory exists
	if not os.path.exists(destination_folder):
	os.makedirs(destination_folder)

	# Start the download process
	response = requests.get(url, stream=True)

	# Check if the request was successful
	if response.status_code == 200:
	file_path = os.path.join(destination_folder, filename)
	with open(file_path, 'wb') as file:
	for chunk in response.iter_content(8192):
	file.write(chunk)
	# st.write(f"Image downloaded and saved: {file_path}")
	# else:
	# st.write(f"Failed to download the image. Status code: {response.status_code}")
	except Exception as e:
	st.write(f"An error occurred: {e}")



	BASE_DIR = 'dataset'
	labeldict = {}

	for entry in data_json:
	data_split = entry['Data Split']
	if data_split not in ['training', 'validation']: # we are skipping 'test' for now
	continue

	image_url = f"{entry['Labeled Data']}"
	label = entry['Label']['classifications'][0]['answer']['value']
	labeldict[label] = entry['Label']['classifications'][0]['answer']['title']

	destination_folder = os.path.join(BASE_DIR, data_split, label)
	filename = os.path.basename(image_url)
	# st.write(filename)

	download_and_save_image(image_url, destination_folder, filename.split('?')[0])

	"""#Train Model"""
	# st.write(labeldict)

	import tensorflow as tf
	from tensorflow.keras.preprocessing.image import ImageDataGenerator
	from tensorflow.keras.applications import MobileNetV2
	from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
	from tensorflow.keras.models import Model
	from tensorflow.keras.optimizers import Adam

	TRAIN_DIR = 'dataset/training'
	VALIDATION_DIR = 'dataset/validation'
	IMG_HEIGHT, IMG_WIDTH = 224, 224 # default size for MobileNetV2
	BATCH_SIZE = 32

	train_datagen = ImageDataGenerator(
	rescale=1./255,
	rotation_range=20,
	width_shift_range=0.2,
	height_shift_range=0.2,
	shear_range=0.2,
	zoom_range=0.2,
	horizontal_flip=True,
	fill_mode='nearest'
	)

	validation_datagen = ImageDataGenerator(rescale=1./255)

	train_ds = train_datagen.flow_from_directory(
	TRAIN_DIR,
	target_size=(IMG_HEIGHT, IMG_WIDTH),
	batch_size=BATCH_SIZE,
	class_mode='categorical'
	)

	validation_ds = validation_datagen.flow_from_directory(
	VALIDATION_DIR,
	target_size=(IMG_HEIGHT, IMG_WIDTH),
	batch_size=BATCH_SIZE,
	class_mode='categorical'
	)

	base_model = MobileNetV2(input_shape=(IMG_HEIGHT, IMG_WIDTH, 3),
	include_top=False,
	weights='imagenet')

	# Freeze the base model
	for layer in base_model.layers:
	layer.trainable = False

	# Create custom classification head
	x = base_model.output
	x = GlobalAveragePooling2D()(x)
	x = Dense(1024, activation='relu')(x)
	predictions = Dense(train_ds.num_classes, activation='softmax')(x)

	model = Model(inputs=base_model.input, outputs=predictions)

	model.compile(optimizer=Adam(learning_rate=0.0001),
	loss='categorical_crossentropy',
	metrics=['accuracy'])

	st.write("training")
	history = model.fit(
	train_ds,
	validation_data=validation_ds,
	epochs=EPOCHS
	)

	"""Run Inference on Model run Datarows"""
	st.write('running Inference')

	import numpy as np
	import requests
	from tensorflow.keras.preprocessing import image
	from PIL import Image
	from io import BytesIO
	# Fetch the image from the URL
	def load_image_from_url(img_url, target_size=(224, 224)):
	response = requests.get(img_url)
	img = Image.open(BytesIO(response.content))
	img = img.resize(target_size)
	img_array = image.img_to_array(img)
	return np.expand_dims(img_array, axis=0)
	def make_prediction(img_url):
	# Image URL
	img_url = img_url

	# Load and preprocess the image
	img_data = load_image_from_url(img_url)
	img_data = img_data / 255.0 # Normalize the image data to [0,1]

	# Make predictions
	predictions = model.predict(img_data)
	predicted_class = np.argmax(predictions[0])

	# Retrieve the confidence score (probability) for the predicted class
	confidence = predictions[0][predicted_class]

	# Map the predicted class index to its corresponding label
	class_map = train_ds.class_indices
	inverse_map = {v: k for k, v in class_map.items()}
	predicted_label = inverse_map[predicted_class]

	return predicted_label, confidence

	from tensorflow.errors import InvalidArgumentError # Add this import
	ontology = client.get_ontology(ontology_id)
	label_list = []
	# st.write(ontology)
	for datarow in model_run.export_labels(download=True):
	try:
	label, confidence = make_prediction(datarow['Labeled Data'])
	except InvalidArgumentError as e:
	print(f"InvalidArgumentError: {e}. Skipping this data row.")
	continue # Skip to the next datarow if an exception occurs
	my_checklist_answer = ClassificationAnswer(
	name = labeldict[label.lower()],
	confidence=confidence)
	checklist_prediction = ClassificationAnnotation(
	name=ontology.classifications()[0].instructions,
	value=Radio(
	answer = my_checklist_answer
	))
	# print(datarow["DataRow ID"])
	label_prediction = Label(
	data=ImageData(uid=datarow['DataRow ID']),
	annotations = [checklist_prediction])
	label_list.append(label_prediction)

	prediction_import = model_run.add_predictions(
	name="prediction_upload_job"+str(uuid.uuid4()),
	predictions=label_list)

	prediction_import.wait_until_done()

	st.write(prediction_import.errors == [])
	if prediction_import.errors == []:
	return "Model Trained and inference ran successfully"
	else:
	return prediction_import.errors

	st.title("Enter Applicable IDs and keys below")

	model_run_id = st.text_input("Enter your model run ID:")
	ontology_id = st.text_input("Enter your ontology ID:")

	if st.button("Train and run inference"):
	st.write('Starting Up...')
	# Check if the key is not empty
	if api_key + model_run_id + ontology_id:
	result = train_and_inference(api_key, ontology_id, model_run_id)
	st.write(result)
	else:
	st.warning("Please enter all keys.")