Spaces:
Runtime error
Runtime error
| import streamlit as st | |
| # x = st.slider("Select a value") | |
| # st.write(x, "squared is", x * x) | |
| st.title('Welcome to the Labelbox custom classifier training application!') | |
| st.header('In this module you will be able to add data to your instance if you dont already have it, and then use bulk classification to train a custom image classification model') | |
| st.subheader("If you don't have data in your org, enter your API Click the button below! Otherwise, Skip to section 2") | |
| st.video("https://storage.googleapis.com/app-videos/How%20to%20Make%20an%20API%20Key%20on%20labelbox.com.mp4") | |
| st.write("this video show you how to create an API key in Labelbox") | |
| # -*- coding: utf-8 -*- | |
| """ | |
| Original file is located at | |
| https://colab.research.google.com/drive/1nOSff67KXhNgX_XSfnv3xnddobRoaK0d | |
| """ | |
| api_key = st.text_input("Enter your api key:", type="password") | |
| import labelbox | |
| import labelpandas as lp | |
| import os | |
| import pandas as pd | |
| from tensorflow.python.lib.io import file_io | |
| import io | |
| from pandas import read_csv | |
| # read csv file from google cloud storage | |
| def read_data(gcs_path): | |
| file_stream = file_io.FileIO(gcs_path, mode='r') | |
| csv_data = read_csv(io.StringIO(file_stream.read())) | |
| return csv_data | |
| def freedatatolb(amount_of_data): | |
| client = lp.Client(api_key) | |
| gcs_path = 'https://storage.googleapis.com/solution_accelerator_datasets/images_styles.csv' | |
| df = pd.read_csv(gcs_path) | |
| df = df.drop(['id', 'season', 'usage', 'year',"gender", "masterCategory", "subCategory", "articleType","baseColour"], axis =1) | |
| fields ={"row_data":["link"], # Column containing URL to asset (single) | |
| "global_key": ['filename'], # Column containing globalkey value (single, unique) | |
| "external_id": ["productDisplayName"], # Column containing external ID value (single) | |
| "metadata_string": [], # Column containing string metadata values (multiple) | |
| "metadata_number": [], # Column containing number metadata values (multiple) | |
| "metadata_datetime": [] # Column containing datetime metadata values (multiple, must be ISO 8601) | |
| } | |
| columns = {} | |
| for field in fields.keys(): | |
| for name in fields[field]: | |
| if field.startswith('metadata'): | |
| columns[name] = f"{field.split('_')[0]}///{field.split('_')[1]}///{name}" | |
| else: | |
| columns[name] = field | |
| new_df = df.rename(columns=(columns)) | |
| testdf = new_df.head(amount_of_data) | |
| dataset_id = client.lb_client.create_dataset(name = str(gcs_path.split('/')[-1])).uid | |
| # dataset_id = client.lb_client.get_dataset("c4b7prd6207850000lljx2hr8").uid | |
| results = client.create_data_rows_from_table( | |
| table = testdf, | |
| dataset_id = dataset_id, | |
| skip_duplicates = True, # If True, will skip data rows where a global key is already in use, | |
| verbose = True, # If True, prints information about code execution | |
| ) | |
| return results | |
| data_amount = st.slider("choose amout of data to add to labelbox", 500, 1000) | |
| if st.button("Add data to your Labelbox"): | |
| st.write(f"adding {data_amount} datarows to Labelbox instance") | |
| bing = freedatatolb(data_amount) | |
| st.title("SECTION 2") | |
| st.header("Create project and bulk classify images") | |
| st.video("https://storage.googleapis.com/app-videos/Setting%20up%20Platform%20for%20Training%20a%20Model.mp4") | |
| st.write("this video will help you set up a project for storing bulk classifications") | |
| st.video("https://storage.googleapis.com/app-videos/Bulk%20Classification%20and%20Training%20Our%20Model.mp4") | |
| st.write("this video teaches how to bulk classify the images and set up our model for training") | |
| st.title("SECTION 3") | |
| st.header("Auto Image classifier training and inference: Imagnet Weights") | |
| # -*- coding: utf-8 -*- | |
| """ | |
| Original file is located at | |
| https://colab.research.google.com/drive/1CSyAE9DhwGTl7bLaSoo7QSyMuoEqJpCj | |
| """ | |
| def train_and_inference(api_key, ontology_id, model_run_id): | |
| # st.write('thisisstarting') | |
| api_key = api_key # insert Labelbox API key | |
| ontology_id = ontology_id # get the ontology ID from the Settings tab at the top left of your model run | |
| model_run_id = model_run_id #get the model run ID from the settings gear icon on the right side of your Model Run | |
| # st.write('1') | |
| import pydantic | |
| # st.write(pydantic.__version__) | |
| import numpy as np | |
| # st.write('2') | |
| import tensorflow as tf | |
| # st.write('3') | |
| from tensorflow.keras import layers | |
| # st.write('4') | |
| from tensorflow.keras.models import Sequential | |
| # st.write('5') | |
| from tensorflow.keras.preprocessing.image import ImageDataGenerator | |
| # st.write('6') | |
| import os | |
| # st.write('7') | |
| import labelbox | |
| # st.write('zat') | |
| from labelbox import Client | |
| # st.write('8') | |
| # st.write('9') | |
| import numpy as np | |
| import tensorflow as tf | |
| from tensorflow.keras import layers | |
| from tensorflow.keras.models import Sequential | |
| from tensorflow.keras.preprocessing.image import ImageDataGenerator | |
| import os | |
| from labelbox.schema.ontology import OntologyBuilder, Tool, Classification, Option | |
| from labelbox import Client, LabelingFrontend, LabelImport, MALPredictionImport | |
| from labelbox.data.annotation_types import ( | |
| Label, ImageData, ObjectAnnotation, MaskData, | |
| Rectangle, Point, Line, Mask, Polygon, | |
| Radio, Checklist, Text, | |
| ClassificationAnnotation, ClassificationAnswer | |
| ) | |
| from labelbox import MediaType | |
| from labelbox.data.serialization import NDJsonConverter | |
| import pandas as pd | |
| import shutil | |
| import labelbox.data | |
| import scipy | |
| import json | |
| import uuid | |
| import time | |
| import requests | |
| import pandas as pd | |
| import shutil | |
| import json | |
| import uuid | |
| import time | |
| import requests | |
| # st.write('imports') | |
| """Connect to labelbox client | |
| Define Model Variables | |
| """ | |
| client = Client(api_key) | |
| EPOCHS = 10 | |
| """#Setup Training | |
| Export Classifications from Model Run | |
| """ | |
| model_run = client.get_model_run(model_run_id) | |
| client.enable_experimental = True | |
| data_json = model_run.export_labels(download=True) | |
| print(data_json) | |
| """Separate datarows into folders.""" | |
| import requests | |
| import os | |
| from urllib.parse import unquote | |
| def download_and_save_image(url, destination_folder, filename): | |
| try: | |
| # Decode the URL | |
| url = unquote(url) | |
| # Ensure destination directory exists | |
| if not os.path.exists(destination_folder): | |
| os.makedirs(destination_folder) | |
| # Start the download process | |
| response = requests.get(url, stream=True) | |
| # Check if the request was successful | |
| if response.status_code == 200: | |
| file_path = os.path.join(destination_folder, filename) | |
| with open(file_path, 'wb') as file: | |
| for chunk in response.iter_content(8192): | |
| file.write(chunk) | |
| # st.write(f"Image downloaded and saved: {file_path}") | |
| # else: | |
| # st.write(f"Failed to download the image. Status code: {response.status_code}") | |
| except Exception as e: | |
| st.write(f"An error occurred: {e}") | |
| BASE_DIR = 'dataset' | |
| labeldict = {} | |
| for entry in data_json: | |
| data_split = entry['Data Split'] | |
| if data_split not in ['training', 'validation']: # we are skipping 'test' for now | |
| continue | |
| image_url = f"{entry['Labeled Data']}" | |
| label = entry['Label']['classifications'][0]['answer']['value'] | |
| labeldict[label] = entry['Label']['classifications'][0]['answer']['title'] | |
| destination_folder = os.path.join(BASE_DIR, data_split, label) | |
| filename = os.path.basename(image_url) | |
| # st.write(filename) | |
| download_and_save_image(image_url, destination_folder, filename.split('?')[0]) | |
| """#Train Model""" | |
| # st.write(labeldict) | |
| import tensorflow as tf | |
| from tensorflow.keras.preprocessing.image import ImageDataGenerator | |
| from tensorflow.keras.applications import MobileNetV2 | |
| from tensorflow.keras.layers import Dense, GlobalAveragePooling2D | |
| from tensorflow.keras.models import Model | |
| from tensorflow.keras.optimizers import Adam | |
| TRAIN_DIR = 'dataset/training' | |
| VALIDATION_DIR = 'dataset/validation' | |
| IMG_HEIGHT, IMG_WIDTH = 224, 224 # default size for MobileNetV2 | |
| BATCH_SIZE = 32 | |
| train_datagen = ImageDataGenerator( | |
| rescale=1./255, | |
| rotation_range=20, | |
| width_shift_range=0.2, | |
| height_shift_range=0.2, | |
| shear_range=0.2, | |
| zoom_range=0.2, | |
| horizontal_flip=True, | |
| fill_mode='nearest' | |
| ) | |
| validation_datagen = ImageDataGenerator(rescale=1./255) | |
| train_ds = train_datagen.flow_from_directory( | |
| TRAIN_DIR, | |
| target_size=(IMG_HEIGHT, IMG_WIDTH), | |
| batch_size=BATCH_SIZE, | |
| class_mode='categorical' | |
| ) | |
| validation_ds = validation_datagen.flow_from_directory( | |
| VALIDATION_DIR, | |
| target_size=(IMG_HEIGHT, IMG_WIDTH), | |
| batch_size=BATCH_SIZE, | |
| class_mode='categorical' | |
| ) | |
| base_model = MobileNetV2(input_shape=(IMG_HEIGHT, IMG_WIDTH, 3), | |
| include_top=False, | |
| weights='imagenet') | |
| # Freeze the base model | |
| for layer in base_model.layers: | |
| layer.trainable = False | |
| # Create custom classification head | |
| x = base_model.output | |
| x = GlobalAveragePooling2D()(x) | |
| x = Dense(1024, activation='relu')(x) | |
| predictions = Dense(train_ds.num_classes, activation='softmax')(x) | |
| model = Model(inputs=base_model.input, outputs=predictions) | |
| model.compile(optimizer=Adam(learning_rate=0.0001), | |
| loss='categorical_crossentropy', | |
| metrics=['accuracy']) | |
| st.write("training") | |
| history = model.fit( | |
| train_ds, | |
| validation_data=validation_ds, | |
| epochs=EPOCHS | |
| ) | |
| """Run Inference on Model run Datarows""" | |
| st.write('running Inference') | |
| import numpy as np | |
| import requests | |
| from tensorflow.keras.preprocessing import image | |
| from PIL import Image | |
| from io import BytesIO | |
| # Fetch the image from the URL | |
| def load_image_from_url(img_url, target_size=(224, 224)): | |
| response = requests.get(img_url) | |
| img = Image.open(BytesIO(response.content)) | |
| img = img.resize(target_size) | |
| img_array = image.img_to_array(img) | |
| return np.expand_dims(img_array, axis=0) | |
| def make_prediction(img_url): | |
| # Image URL | |
| img_url = img_url | |
| # Load and preprocess the image | |
| img_data = load_image_from_url(img_url) | |
| img_data = img_data / 255.0 # Normalize the image data to [0,1] | |
| # Make predictions | |
| predictions = model.predict(img_data) | |
| predicted_class = np.argmax(predictions[0]) | |
| # Retrieve the confidence score (probability) for the predicted class | |
| confidence = predictions[0][predicted_class] | |
| # Map the predicted class index to its corresponding label | |
| class_map = train_ds.class_indices | |
| inverse_map = {v: k for k, v in class_map.items()} | |
| predicted_label = inverse_map[predicted_class] | |
| return predicted_label, confidence | |
| from tensorflow.errors import InvalidArgumentError # Add this import | |
| ontology = client.get_ontology(ontology_id) | |
| label_list = [] | |
| # st.write(ontology) | |
| for datarow in model_run.export_labels(download=True): | |
| try: | |
| label, confidence = make_prediction(datarow['Labeled Data']) | |
| except InvalidArgumentError as e: | |
| print(f"InvalidArgumentError: {e}. Skipping this data row.") | |
| continue # Skip to the next datarow if an exception occurs | |
| my_checklist_answer = ClassificationAnswer( | |
| name = labeldict[label.lower()], | |
| confidence=confidence) | |
| checklist_prediction = ClassificationAnnotation( | |
| name=ontology.classifications()[0].instructions, | |
| value=Radio( | |
| answer = my_checklist_answer | |
| )) | |
| # print(datarow["DataRow ID"]) | |
| label_prediction = Label( | |
| data=ImageData(uid=datarow['DataRow ID']), | |
| annotations = [checklist_prediction]) | |
| label_list.append(label_prediction) | |
| prediction_import = model_run.add_predictions( | |
| name="prediction_upload_job"+str(uuid.uuid4()), | |
| predictions=label_list) | |
| prediction_import.wait_until_done() | |
| st.write(prediction_import.errors == []) | |
| if prediction_import.errors == []: | |
| return "Model Trained and inference ran successfully" | |
| else: | |
| return prediction_import.errors | |
| st.title("Enter Applicable IDs and keys below") | |
| model_run_id = st.text_input("Enter your model run ID:") | |
| ontology_id = st.text_input("Enter your ontology ID:") | |
| if st.button("Train and run inference"): | |
| st.write('Starting Up...') | |
| # Check if the key is not empty | |
| if api_key + model_run_id + ontology_id: | |
| result = train_and_inference(api_key, ontology_id, model_run_id) | |
| st.write(result) | |
| else: | |
| st.warning("Please enter all keys.") | |