Spaces:

nicpopovic
/

ember

Sleeping

App Files Files Community

nicpopovic commited on Oct 9, 2024

Commit

ee5a50d

verified ·

1 Parent(s): 86336aa

Upload 13 files

Browse files

Files changed (13) hide show

app.py +560 -0
data/meta-llama/Llama-3.2-1B/STOKE_100/checkpoints/span_classifier/Rxi8b70XJA/checkpoint.pt +3 -0
data/meta-llama/Llama-3.2-1B/STOKE_100/checkpoints/span_classifier/Rxi8b70XJA/config.json +29 -0
data/meta-llama/Llama-3.2-1B/STOKE_100/checkpoints/span_classifier/Rxi8b70XJA/config_train.json +51 -0
data/meta-llama/Llama-3.2-1B/STOKE_100/checkpoints/token_classifier/dR8xQB4ODU/checkpoint.pt +3 -0
data/meta-llama/Llama-3.2-1B/STOKE_100/checkpoints/token_classifier/dR8xQB4ODU/config.json +166 -0
data/meta-llama/Llama-3.2-1B/STOKE_100/checkpoints/token_classifier/dR8xQB4ODU/config_train.json +51 -0
data/meta-llama/Llama-3.2-1B/STOKE_100/checkpoints/token_classifier/pbK46jjAVx/checkpoint.pt +3 -0
data/meta-llama/Llama-3.2-1B/STOKE_100/checkpoints/token_classifier/pbK46jjAVx/config.json +166 -0
data/meta-llama/Llama-3.2-1B/STOKE_100/checkpoints/token_classifier/pbK46jjAVx/config_train.json +51 -0
data/meta-llama/Llama-3.2-1B/STOKE_100/config.json +8 -0
data/meta-llama/Llama-3.2-1B/STOKE_100/stoke_config.json +10 -0
requirements.txt +8 -0

app.py ADDED Viewed

	@@ -0,0 +1,560 @@

+import gradio as gr
+from transformers import AutoModelForCausalLM, AutoTokenizer, STOKEStreamer
+from threading import Thread
+import json
+import torch
+import os
+import numpy as np
+import matplotlib.pyplot as plt
+from matplotlib.colors import to_hex
+import itertools
+import transformers
+transformers.logging.set_verbosity_error()
+# Variable to define number of instances
+n_instances = 1
+gpu_name = "CPU"
+for i in range(torch.cuda.device_count()):
+   gpu_name = torch.cuda.get_device_properties(i).name
+# Reusing the original MLP class and other functions (unchanged) except those specific to Streamlit
+class MLP(torch.nn.Module):
+    def __init__(self, input_dim, output_dim, hidden_dim=1024, layer_id=0, cuda=False):
+        super(MLP, self).__init__()
+        self.fc1 = torch.nn.Linear(input_dim, hidden_dim)
+        self.fc3 = torch.nn.Linear(hidden_dim, output_dim)
+        self.layer_id = layer_id
+        if cuda:
+            self.device = "cuda"
+        else:
+            self.device = "cpu"
+        self.to(self.device)
+    def forward(self, x):
+        x = torch.flatten(x, start_dim=1)
+        x = torch.relu(self.fc1(x))
+        x = self.fc3(x)
+        return torch.argmax(x, dim=-1).cpu().detach(), torch.softmax(x, dim=-1).cpu().detach()
+def map_value_to_color(value, colormap_name='tab20c'):
+    value = np.clip(value, 0.0, 1.0)
+    colormap = plt.get_cmap(colormap_name)
+    rgba_color = colormap(value)
+    css_color = to_hex(rgba_color)
+    return css_color + "88"
+# Caching functions for model and classifier
+model_cache = {}
+def get_multiple_model_and_tokenizer(name, n_instances):
+    model_instances = []
+    for _ in range(n_instances):
+        tok = AutoTokenizer.from_pretrained(name, token=os.getenv('HF_TOKEN'), pad_token_id=128001)
+        model = AutoModelForCausalLM.from_pretrained(name, token=os.getenv('HF_TOKEN'), torch_dtype="bfloat16", pad_token_id=128001, device_map="auto")
+        if torch.cuda.is_available():
+            model.cuda()
+        model_instances.append((model, tok))
+    return model_instances
+def get_classifiers_for_model(att_size, emb_size, device, config_paths):
+    config = {
+        "classifier_token": json.load(open(os.path.join(config_paths["classifier_token"], "config.json"), "r")),
+        "classifier_span": json.load(open(os.path.join(config_paths["classifier_span"], "config.json"), "r"))
+    }
+    layer_id = config["classifier_token"]["layer"]
+    classifier_span = MLP(att_size, 2, hidden_dim=config["classifier_span"]["classifier_dim"]).to(device)
+    classifier_span.load_state_dict(torch.load(os.path.join(config_paths["classifier_span"], "checkpoint.pt"), map_location=device, weights_only=True))
+    classifier_token = MLP(emb_size, len(config["classifier_token"]["label_map"]), layer_id=layer_id, hidden_dim=config["classifier_token"]["classifier_dim"]).to(device)
+    classifier_token.load_state_dict(torch.load(os.path.join(config_paths["classifier_token"], "checkpoint.pt"), map_location=device, weights_only=True))
+    return classifier_span, classifier_token, config["classifier_token"]["label_map"]
+def find_datasets_and_model_ids(root_dir):
+    datasets = {}
+    for root, dirs, files in os.walk(root_dir):
+        if 'config.json' in files and 'stoke_config.json' in files:
+            config_path = os.path.join(root, 'config.json')
+            stoke_config_path = os.path.join(root, 'stoke_config.json')
+            with open(config_path, 'r') as f:
+                config_data = json.load(f)
+                model_id = config_data.get('model_id')
+                if model_id:
+                    dataset_name = os.path.basename(os.path.dirname(config_path))
+            with open(stoke_config_path, 'r') as f:
+                stoke_config_data = json.load(f)
+                if model_id:
+                    dataset_name = os.path.basename(os.path.dirname(stoke_config_path))
+                    datasets.setdefault(model_id, {})[dataset_name] = stoke_config_data
+    return datasets
+def filter_spans(spans_and_values):
+    if spans_and_values == []:
+        return [], []
+    # Create a dictionary to store spans based on their second index values
+    span_dict = {}
+    spans, values = [x[0] for x in spans_and_values], [x[1] for x in spans_and_values]
+    # Iterate through the spans and update the dictionary with the highest value
+    for span, value in zip(spans, values):
+        start, end = span
+        if start > end or end - start > 15 or start == 0:
+            continue
+        current_value = span_dict.get(end, None)
+        if current_value is None or current_value[1] < value:
+            span_dict[end] = (span, value)
+    if span_dict == {}:
+        return [], []
+    # Extract the filtered spans and values
+    filtered_spans, filtered_values = zip(*span_dict.values())
+    return list(filtered_spans), list(filtered_values)
+def remove_overlapping_spans(spans):
+    # Sort the spans based on their end points
+    sorted_spans = sorted(spans, key=lambda x: x[0][1])
+    non_overlapping_spans = []
+    last_end = float('-inf')
+    # Iterate through the sorted spans
+    for span in sorted_spans:
+        start, end = span[0]
+        value = span[1]
+        # If the current span does not overlap with the previous one
+        if start >= last_end:
+            non_overlapping_spans.append(span)
+            last_end = end
+        else:
+            # If it overlaps, choose the one with the highest value
+            existing_span_index = -1
+            for i, existing_span in enumerate(non_overlapping_spans):
+                if existing_span[0][1] <= start:
+                    existing_span_index = i
+                    break
+            if existing_span_index != -1 and non_overlapping_spans[existing_span_index][1] < value:
+                non_overlapping_spans[existing_span_index] = span
+    return non_overlapping_spans
+def generate_html_no_overlap(tokenized_text, spans):
+    current_index = 0
+    html_content = ""
+    for (span_start, span_end), value in spans:
+        # Add text before the span
+        html_content += "".join(tokenized_text[current_index:span_start])
+        # Add the span with underlining
+        html_content += "<b><u>"
+        html_content += "".join(tokenized_text[span_start:span_end])
+        html_content += "</u></b> "
+        current_index = span_end
+    # Add any remaining text after the last span
+    html_content += "".join(tokenized_text[current_index:])
+    return html_content
+css = """
+    <style>
+    .prose {
+        line-height: 200%;
+    }
+    .highlight {
+        display: inline;
+    }
+    .highlight::after {
+        background-color: var(data-color);
+    }
+    .spanhighlight {
+        padding: 2px 5px;
+        border-radius: 5px;
+    }
+    .tooltip {
+    position: relative;
+    display: inline-block;
+    }
+    .generated-content {
+        margin-top: -1em;
+        height: 130px;
+    }
+.tooltip::after {
+    content: attr(data-tooltip-text); /* Set content from data-tooltip-text attribute */
+    display: none;
+    position: absolute;
+    background-color: #333;
+    color: #fff;
+    padding: 5px;
+    border-radius: 5px;
+    bottom: 100%; /* Position it above the element */
+    left: 50%;
+    transform: translateX(-50%);
+    width: auto;
+    min-width: 120px;
+    margin: 0 auto;
+    text-align: center;
+}
+.tooltip:hover::after {
+    display: block; /* Show the tooltip on hover */
+}
+.small-text {
+    padding: 2px 5px;
+    background-color: white;
+    border-radius: 5px;
+    font-size: xx-small;
+    margin-left: 0.5em;
+    vertical-align: 0.2em;
+    font-weight: bold;
+    color: grey!important;
+}
+    </style>"""
+def generate_html_spanwise(token_strings, tokenwise_preds, spans, tokenizer, new_tags):
+    # spanwise annotated text
+    annotated = []
+    span_ends = -1
+    in_span = False
+    out_of_span_tokens = []
+    for i in reversed(range(len(tokenwise_preds))):
+        if in_span:
+            if i >= span_ends:
+                continue
+            else:
+                in_span = False
+        predicted_class = ""
+        style = ""
+        span = None
+        for s in spans:
+            if s[1] == i+1:
+                span = s
+        if tokenwise_preds[i] != 0 and span is not None:
+            predicted_class = f"highlight spanhighlight"
+            style = f"background-color: {map_value_to_color((tokenwise_preds[i]-1)/(len(new_tags)-1))}"
+            if tokenizer.convert_tokens_to_string([token_strings[i]]).startswith(" "):
+                annotated.append("Ġ")
+            span_opener = f"Ġ<span class='{predicted_class}' data-tooltip-text='{new_tags[tokenwise_preds[i]]}' style='{style}'>".replace(" ", "Ġ")
+            span_end = f"<span class='small-text'>{new_tags[tokenwise_preds[i]]}</span></span>"
+            annotated.extend(out_of_span_tokens)
+            out_of_span_tokens = []
+            span_ends = span[0]
+            in_span = True
+            annotated.append(span_end)
+            annotated.extend([token_strings[x] for x in reversed(range(span[0], span[1]))])
+            annotated.append(span_opener)
+        else:
+            out_of_span_tokens.append(token_strings[i])
+    annotated.extend(out_of_span_tokens)
+    return [x for x in reversed(annotated)]
+def gen_json(input_text, max_new_tokens):
+    streamer = STOKEStreamer(tok, classifier_token, classifier_span)
+    new_tags = label_map
+    inputs = tok([f"  {input_text}"], return_tensors="pt").to(model.device)
+    generation_kwargs = dict(
+        inputs, streamer=streamer, max_new_tokens=max_new_tokens,
+        repetition_penalty=1.2, do_sample=False
+    )
+    def generate_async():
+        model.generate(**generation_kwargs)
+    thread = Thread(target=generate_async)
+    thread.start()
+    # Display generated text as it becomes available
+    output_text = ""
+    text_tokenwise = ""
+    text_spans = ""
+    removed_spans = ""
+    tags = []
+    spans = []
+    for new_text in streamer:
+        if new_text[1] is not None and new_text[2] != ['']:
+            text_tokenwise = ""
+            output_text = ""
+            tags.extend(new_text[1])
+            spans.extend(new_text[-1])
+            # Tokenwise Classification
+            for tk, pred in zip(new_text[2],tags):
+                if pred != 0:
+                    style = f"background-color: {map_value_to_color((pred-1)/(len(new_tags)-1))}"
+                    if tk.startswith(" "):
+                        text_tokenwise += " "
+                    text_tokenwise += f"<span class='tooltip highlight' data-tooltip-text='{new_tags[pred]}' style='{style}'>{tk}</span>"
+                    output_text += tk
+                else:
+                    text_tokenwise += tk
+                    output_text += tk
+            # Span Classification
+            text_spans = ""
+            if len(spans) > 0:
+                filtered_spans = remove_overlapping_spans(spans)
+                text_spans = generate_html_no_overlap(new_text[2], filtered_spans)
+                if len(spans) - len(filtered_spans) > 0:
+                    removed_spans = f"{len(spans) - len(filtered_spans)} span(s) hidden due to overlap."
+            else:
+                for tk in new_text[2]:
+                    text_spans += f"{tk}"
+            # Spanwise Classification
+            annotated_tokens = generate_html_spanwise(new_text[2], tags, [x for x in filter_spans(spans)[0]], tok, new_tags)
+            generated_text_spanwise = tok.convert_tokens_to_string(annotated_tokens).replace("<|endoftext|>", "").replace("<|begin_of_text|>", "")
+            output = f"{css}<br>"
+            output += generated_text_spanwise.replace("\n", " ").replace("$", "$") + "\n<br>"
+            #output += "<h5>Show tokenwise classification</h5>\n" + text_tokenwise.replace("\n", " ").replace("$", "\\$").replace("<|endoftext|>", "").replace("<|begin_of_text|>", "")
+            #output += "</details><details><summary>Show spans</summary>\n" + text_spans.replace("\n", " ").replace("$", "\\$")
+            #if removed_spans != "":
+            #    output += f"<br><br><i>({removed_spans})</i>"
+            list_of_spans = [{"name": tok.convert_tokens_to_string(new_text[2][x[0]:x[1]]).strip(), "type": new_tags[tags[x[1]-1]]} for x in filter_spans(spans)[0] if new_tags[tags[x[1]-1]] != "O"]
+            out_dict = {"text": output_text.replace("<|endoftext|>", "").replace("<|begin_of_text|>", "".strip()), "entites": list_of_spans}
+            yield out_dict
+    return
+# Gradio app function to generate text using the assigned model instance
+def generate_text(input_text, max_new_tokens=40):
+    if input_text == "":
+        yield "Please enter some text first."
+        return
+    # Select the next model instance in a round-robin manner
+    model, tok = next(model_round_robin)
+    generate_button.visible = False
+    streamer = STOKEStreamer(tok, classifier_token, classifier_span)
+    new_tags = label_map
+    inputs = tok([f"  {input_text[:200]}"], return_tensors="pt").to(model.device)
+    generation_kwargs = dict(
+        inputs, streamer=streamer, max_new_tokens=max_new_tokens,
+        repetition_penalty=1.2, do_sample=False, temperature=None, top_p=None
+    )
+    def generate_async():
+        model.generate(**generation_kwargs)
+    thread = Thread(target=generate_async)
+    thread.start()
+    # Display generated text as it becomes available
+    output_text = ""
+    text_tokenwise = ""
+    text_spans = ""
+    removed_spans = ""
+    tags = []
+    spans = []
+    for new_text in streamer:
+        if new_text[1] is not None and new_text[2] != ['']:
+            text_tokenwise = ""
+            output_text = ""
+            tags.extend(new_text[1])
+            spans.extend(new_text[-1])
+            # Tokenwise Classification
+            for tk, pred in zip(new_text[2],tags):
+                if pred != 0:
+                    style = f"background-color: {map_value_to_color((pred-1)/(len(new_tags)-1))}"
+                    if tk.startswith(" "):
+                        text_tokenwise += " "
+                    text_tokenwise += f"<span class='tooltip highlight' data-tooltip-text='{new_tags[pred]}' style='{style}'>{tk}</span>"
+                    output_text += tk
+                else:
+                    text_tokenwise += tk
+                    output_text += tk
+            # Span Classification
+            text_spans = ""
+            if len(spans) > 0:
+                filtered_spans = remove_overlapping_spans(spans)
+                text_spans = generate_html_no_overlap(new_text[2], filtered_spans)
+                if len(spans) - len(filtered_spans) > 0:
+                    removed_spans = f"{len(spans) - len(filtered_spans)} span(s) hidden due to overlap."
+            else:
+                for tk in new_text[2]:
+                    text_spans += f"{tk}"
+            # Spanwise Classification
+            annotated_tokens = generate_html_spanwise(new_text[2], tags, [x for x in filter_spans(spans)[0]], tok, new_tags)
+            generated_text_spanwise = tok.convert_tokens_to_string(annotated_tokens).replace("<|endoftext|>", "").replace("<|begin_of_text|>", "")
+            output = f"{css}<div class=\"generated-content\"><br>"
+            output += generated_text_spanwise.replace("\n", " ").replace("$", "$") + "\n<br>"
+            list_of_spans = [{"name": tok.convert_tokens_to_string(new_text[2][x[0]:x[1]]).strip(), "type": new_tags[tags[x[1]-1]]} for x in filter_spans(spans)[0] if new_tags[tags[x[1]-1]] != "O"]
+            out_dict = {"text": output_text.replace("<|endoftext|>", "").replace("<|begin_of_text|>", "").strip(), "entites": list_of_spans}
+            yield output + "</div>"
+    generate_button.visible = True
+    return
+# Load datasets and models for the Gradio app
+datasets = find_datasets_and_model_ids("data/")
+available_models = list(datasets.keys())
+available_datasets = {model: list(datasets[model].keys()) for model in available_models}
+available_configs = {model: {dataset: list(datasets[model][dataset].keys()) for dataset in available_datasets[model]} for model in available_models}
+def update_datasets(model_name):
+    return available_datasets[model_name]
+def update_configs(model_name, dataset_name):
+    return available_configs[model_name][dataset_name]
+# Load datasets and models for the Gradio app
+datasets = find_datasets_and_model_ids("data/")
+available_models = list(datasets.keys())
+available_datasets = {model: list(datasets[model].keys()) for model in available_models}
+available_configs = {model: {dataset: list(datasets[model][dataset].keys()) for dataset in available_datasets[model]} for model in available_models}
+# Set the model ID and data configurations
+model_id = "meta-llama/Llama-3.2-1B"
+data_id = "STOKE_100"
+config_id = "default"
+# Load n_instances separate instances of the model and tokenizer
+model_instances = get_multiple_model_and_tokenizer(model_id, n_instances)
+# Set up the round-robin iterator to distribute the requests across model instances
+model_round_robin = itertools.cycle(model_instances)
+# Load model classifiers
+try:
+    classifier_span, classifier_token, label_map = get_classifiers_for_model(
+        model_instances[0][0].config.n_head * model_instances[0][0].config.n_layer, model_instances[0][0].config.n_embd, model_instances[0][0].device,
+        datasets[model_id][data_id][config_id]
+    )
+except:
+    classifier_span, classifier_token, label_map = get_classifiers_for_model(
+        model_instances[0][0].config.num_attention_heads * model_instances[0][0].config.num_hidden_layers, model_instances[0][0].config.hidden_size, model_instances[0][0].device,
+        datasets[model_id][data_id][config_id]
+    )
+initial_output = (css+"""<div class=\"generated-content\"><br><style>
+    .prose {
+        line-height: 200%;
+    }
+    .highlight {
+        display: inline;
+    }
+    .highlight::after {
+        background-color: var(data-color);
+    }
+    .spanhighlight {
+        padding: 2px 5px;
+        border-radius: 5px;
+    }
+    .tooltip {
+    position: relative;
+    display: inline-block;
+}
+.tooltip::after {
+    content: attr(data-tooltip-text); /* Set content from data-tooltip-text attribute */
+    display: none;
+    position: absolute;
+    background-color: #333;
+    color: #fff;
+    padding: 5px;
+    border-radius: 5px;
+    bottom: 100%; /* Position it above the element */
+    left: 50%;
+    transform: translateX(-50%);
+    width: auto;
+    min-width: 120px;
+    margin: 0 auto;
+    text-align: center;
+}
+.tooltip:hover::after {
+    display: block; /* Show the tooltip on hover */
+}
+.small-text {
+    padding: 2px 5px;
+    background-color: white;
+    border-radius: 5px;
+    font-size: xx-small;
+    margin-left: 0.5em;
+    vertical-align: 0.2em;
+    font-weight: bold;
+    color: grey!important;
+}
+    </style><span class='highlight spanhighlight' data-tooltip-text='GPE' style='background-color: #e6550d88'> Miami<span class='small-text'>GPE</span></span> is a city in the  <span class='highlight spanhighlight' data-tooltip-text='GPE' style='background-color: #e6550d88'> U.S.<span class='small-text'>GPE</span></span> state of <span class='highlight spanhighlight' data-tooltip-text='GPE' style='background-color: #e6550d88'> Florida<span class='small-text'>GPE</span></span>, and it's also known as "  <span class='highlight spanhighlight' data-tooltip-text='WORK_OF_ART' style='background-color: #bdbdbd88'>The Magic City<span class='small-text'>WORK_OF_ART</span></span>." It was founded by  <span class='highlight spanhighlight' data-tooltip-text='PERSON' style='background-color: #bcbddc88'> Henry Flagler<span class='small-text'>PERSON</span></span> on <span class='highlight spanhighlight' data-tooltip-text='DATE' style='background-color: #6baed688'> October 28th, 1896<span class='small-text'>DATE</span></span>.
+<br></div>""", {'text': 'Miami is a city in the U.S. state of Florida, and it\'s also known as "The Magic City." It was founded by Henry Flagler on October 28th, 1896.', 'entites': [{'name': 'Miami', 'type': 'GPE'}, {'name': 'U.S.', 'type': 'GPE'}, {'name': 'Florida', 'type': 'GPE'}, {'name': 'The Magic City', 'type': 'WORK_OF_ART'}, {'name': 'Henry Flagler', 'type': 'PERSON'}, {'name': 'October 28th, 1896', 'type': 'DATE'}]})
+with gr.Blocks(css="footer{display:none !important} .gradio-container {padding: 0!important; height:400px;}", fill_width=True) as demo:
+    with gr.Tab("EMBER Demo"):
+        with gr.Row():
+            output_text = gr.HTML(label="Generated Text", value=initial_output[0])
+        with gr.Group():
+            with gr.Row():
+                input_text = gr.Textbox(label="Enter prompt for completion", value="Miami is", max_length=200)
+                generate_button = gr.Button("Generate", scale=0)
+        # New HTML output for model info
+        model_info_html = gr.HTML(
+            label="Model Info",
+            value=f'<div style="font-weight: lighter; text-align: center; margin-top: -1.5em; margin-bottom: -1em!important; font-size: x-small;">{model_id} running on {gpu_name}</div>'
+        )
+    generate_button.click(
+        fn=generate_text,
+        inputs=[input_text],
+        outputs=[output_text],
+        concurrency_limit=n_instances,
+        concurrency_id="queue"
+    )
+    # Function to refresh the model info HTML
+    def refresh_model_info():
+        return f'<div style="font-weight: lighter; text-align: center; margin-top: -1.5em; margin-bottom: -1em!important; font-size: x-small;">{model_id} running on {gpu_name}</div>'
+    # Update the model info HTML on button click
+    generate_button.click(
+        fn=refresh_model_info,
+        inputs=[],
+        outputs=[model_info_html],
+        queue=False
+    )
+demo.queue()
+demo.launch(server_name="0.0.0.0", server_port=7860)

data/meta-llama/Llama-3.2-1B/STOKE_100/checkpoints/span_classifier/Rxi8b70XJA/checkpoint.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:25951d9b73437a7aa344f4c207cbda2f88d9bf5fa94d1a779617948b18a1c4ed
+size 8439912

data/meta-llama/Llama-3.2-1B/STOKE_100/checkpoints/span_classifier/Rxi8b70XJA/config.json ADDED Viewed

	@@ -0,0 +1,29 @@

+{
+ "model": "meta-llama/Llama-3.2-1B",
+ "type": "span_classifier",
+ "label_map": [
+  "no_span",
+  "span"
+ ],
+ "learning_rate": 0.0003,
+ "classifier_dim": 4096,
+ "loss_weights": [
+  1.0,
+  1.0
+ ],
+ "identifier": "Rxi8b70XJA",
+ "best_f1_validation": 0.8677362203598022,
+ "best_f1_validation_classwise": {
+  "span": {
+   "p": 0.896858811378479,
+   "r": 0.8404456377029419,
+   "f": 0.867736279964447,
+   "s": 24324.0
+  },
+  "macro": {
+   "p": 0.896858811378479,
+   "r": 0.8404456377029419,
+   "f": 0.867736279964447
+  }
+ }
+}

data/meta-llama/Llama-3.2-1B/STOKE_100/checkpoints/span_classifier/Rxi8b70XJA/config_train.json ADDED Viewed

	@@ -0,0 +1,51 @@

+{
+ "path": "data/meta-llama/Llama-3.2-1B/STOKE_100",
+ "splits": [
+  "train",
+  "validation"
+ ],
+ "layers": [
+  8,
+  9,
+  10,
+  11,
+  12
+ ],
+ "hfcache": "",
+ "classifier_dims": [
+  4096
+ ],
+ "learning_rates": [
+  0.0001,
+  5e-05,
+  0.0003
+ ],
+ "cuda": true,
+ "n_steps_per_epoch": 10000,
+ "n_epochs": 30,
+ "batch_size": 8,
+ "balance_loss": false,
+ "loss_weights_span": [
+  [
+   1.0,
+   1.0
+  ],
+  [
+   1.0,
+   50.0
+  ],
+  [
+   1.0,
+   100.0
+  ]
+ ],
+ "time": 1727765390.5829365,
+ "config_dataset": {
+  "generation_kwargs": {
+   "max_new_tokens": 100,
+   "repetition_penalty": 1.2
+  },
+  "model_id": "meta-llama/Llama-3.2-1B",
+  "flair_model_name": "flair/ner-english-ontonotes-large"
+ }
+}

data/meta-llama/Llama-3.2-1B/STOKE_100/checkpoints/token_classifier/dR8xQB4ODU/checkpoint.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:dce5b3038d8767430a8bba16af61ec6af67c9d1aedc75a9f34c01feebac09b6e
+size 33884328

data/meta-llama/Llama-3.2-1B/STOKE_100/checkpoints/token_classifier/dR8xQB4ODU/config.json ADDED Viewed

	@@ -0,0 +1,166 @@

+{
+ "layer": 10,
+ "model": "meta-llama/Llama-3.2-1B",
+ "type": "token_classifier",
+ "label_map": [
+  "O",
+  "CARDINAL",
+  "DATE",
+  "EVENT",
+  "FAC",
+  "GPE",
+  "LANGUAGE",
+  "LAW",
+  "LOC",
+  "MONEY",
+  "NORP",
+  "ORDINAL",
+  "ORG",
+  "PERCENT",
+  "PERSON",
+  "PRODUCT",
+  "QUANTITY",
+  "TIME",
+  "WORK_OF_ART"
+ ],
+ "learning_rate": 5e-05,
+ "classifier_dim": 4096,
+ "loss_weights": [
+  1.0,
+  1.0,
+  1.0,
+  1.0,
+  1.0,
+  1.0,
+  1.0,
+  1.0,
+  1.0,
+  1.0,
+  1.0,
+  1.0,
+  1.0,
+  1.0,
+  1.0,
+  1.0,
+  1.0,
+  1.0,
+  1.0
+ ],
+ "identifier": "dR8xQB4ODU",
+ "best_f1_validation": 0.9056437015533447,
+ "best_f1_validation_classwise": {
+  "CARDINAL": {
+   "p": 0.8679801225662231,
+   "r": 0.8777581453323364,
+   "f": 0.8728417754173279,
+   "s": 10741.0
+  },
+  "DATE": {
+   "p": 0.9519810676574707,
+   "r": 0.9389873743057251,
+   "f": 0.9454395771026611,
+   "s": 8572.0
+  },
+  "EVENT": {
+   "p": 0.8587140440940857,
+   "r": 0.8319672346115112,
+   "f": 0.8451290726661682,
+   "s": 1220.0
+  },
+  "FAC": {
+   "p": 0.8515185713768005,
+   "r": 0.8122317790985107,
+   "f": 0.8314113020896912,
+   "s": 932.0
+  },
+  "GPE": {
+   "p": 0.9000998735427856,
+   "r": 0.9094448685646057,
+   "f": 0.904748260974884,
+   "s": 6935.0
+  },
+  "LANGUAGE": {
+   "p": 0.75,
+   "r": 0.7200000286102295,
+   "f": 0.7346938848495483,
+   "s": 25.0
+  },
+  "LAW": {
+   "p": 0.8709677457809448,
+   "r": 0.73828125,
+   "f": 0.7991543412208557,
+   "s": 256.0
+  },
+  "LOC": {
+   "p": 0.8258426785469055,
+   "r": 0.7101449370384216,
+   "f": 0.7636363506317139,
+   "s": 414.0
+  },
+  "MONEY": {
+   "p": 0.876042902469635,
+   "r": 0.8626760840415955,
+   "f": 0.8693081140518188,
+   "s": 1704.0
+  },
+  "NORP": {
+   "p": 0.9160357713699341,
+   "r": 0.887333333492279,
+   "f": 0.9014561772346497,
+   "s": 1500.0
+  },
+  "ORDINAL": {
+   "p": 0.9303238391876221,
+   "r": 0.9498997926712036,
+   "f": 0.9400099515914917,
+   "s": 998.0
+  },
+  "ORG": {
+   "p": 0.8974575400352478,
+   "r": 0.8792765140533447,
+   "f": 0.8882739543914795,
+   "s": 9675.0
+  },
+  "PERCENT": {
+   "p": 0.8629592657089233,
+   "r": 0.8083720803260803,
+   "f": 0.8347742557525635,
+   "s": 1075.0
+  },
+  "PERSON": {
+   "p": 0.9707135558128357,
+   "r": 0.9713156223297119,
+   "f": 0.9710144996643066,
+   "s": 12899.0
+  },
+  "PRODUCT": {
+   "p": 0.7828418016433716,
+   "r": 0.7564767003059387,
+   "f": 0.7694334387779236,
+   "s": 386.0
+  },
+  "QUANTITY": {
+   "p": 0.8409090638160706,
+   "r": 0.7758846879005432,
+   "f": 0.8070893287658691,
+   "s": 763.0
+  },
+  "TIME": {
+   "p": 0.8710959553718567,
+   "r": 0.8373362421989441,
+   "f": 0.8538825511932373,
+   "s": 1832.0
+  },
+  "WORK_OF_ART": {
+   "p": 0.7803030014038086,
+   "r": 0.7152777910232544,
+   "f": 0.7463768124580383,
+   "s": 576.0
+  },
+  "macro": {
+   "p": 0.8669881820678711,
+   "r": 0.8323702216148376,
+   "f": 0.8488152027130127
+  }
+ }
+}

data/meta-llama/Llama-3.2-1B/STOKE_100/checkpoints/token_classifier/dR8xQB4ODU/config_train.json ADDED Viewed

	@@ -0,0 +1,51 @@

+{
+ "path": "data/meta-llama/Llama-3.2-1B/STOKE_100",
+ "splits": [
+  "train",
+  "validation"
+ ],
+ "layers": [
+  8,
+  9,
+  10,
+  11,
+  12
+ ],
+ "hfcache": "",
+ "classifier_dims": [
+  4096
+ ],
+ "learning_rates": [
+  0.0001,
+  5e-05,
+  0.0003
+ ],
+ "cuda": true,
+ "n_steps_per_epoch": 10000,
+ "n_epochs": 30,
+ "batch_size": 8,
+ "balance_loss": false,
+ "loss_weights_span": [
+  [
+   1.0,
+   1.0
+  ],
+  [
+   1.0,
+   50.0
+  ],
+  [
+   1.0,
+   100.0
+  ]
+ ],
+ "time": 1727765390.5829365,
+ "config_dataset": {
+  "generation_kwargs": {
+   "max_new_tokens": 100,
+   "repetition_penalty": 1.2
+  },
+  "model_id": "meta-llama/Llama-3.2-1B",
+  "flair_model_name": "flair/ner-english-ontonotes-large"
+ }
+}

data/meta-llama/Llama-3.2-1B/STOKE_100/checkpoints/token_classifier/pbK46jjAVx/checkpoint.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f32816959f5fd27967c754a61b07d8ae6c92b7881e2fbb6a68b54b8c0c575122
+size 33884328

data/meta-llama/Llama-3.2-1B/STOKE_100/checkpoints/token_classifier/pbK46jjAVx/config.json ADDED Viewed

	@@ -0,0 +1,166 @@

+{
+ "layer": 10,
+ "model": "meta-llama/Llama-3.2-1B",
+ "type": "token_classifier",
+ "label_map": [
+  "O",
+  "CARDINAL",
+  "DATE",
+  "EVENT",
+  "FAC",
+  "GPE",
+  "LANGUAGE",
+  "LAW",
+  "LOC",
+  "MONEY",
+  "NORP",
+  "ORDINAL",
+  "ORG",
+  "PERCENT",
+  "PERSON",
+  "PRODUCT",
+  "QUANTITY",
+  "TIME",
+  "WORK_OF_ART"
+ ],
+ "learning_rate": 0.0003,
+ "classifier_dim": 4096,
+ "loss_weights": [
+  1.0,
+  1.0,
+  1.0,
+  1.0,
+  1.0,
+  1.0,
+  1.0,
+  1.0,
+  1.0,
+  1.0,
+  1.0,
+  1.0,
+  1.0,
+  1.0,
+  1.0,
+  1.0,
+  1.0,
+  1.0,
+  1.0
+ ],
+ "identifier": "pbK46jjAVx",
+ "best_f1_validation": 0.9048610329627991,
+ "best_f1_validation_classwise": {
+  "CARDINAL": {
+   "p": 0.8730558156967163,
+   "r": 0.8727306723594666,
+   "f": 0.872893214225769,
+   "s": 10741.0
+  },
+  "DATE": {
+   "p": 0.9534441828727722,
+   "r": 0.9365375638008118,
+   "f": 0.944915235042572,
+   "s": 8572.0
+  },
+  "EVENT": {
+   "p": 0.8540268540382385,
+   "r": 0.83442622423172,
+   "f": 0.844112753868103,
+   "s": 1220.0
+  },
+  "FAC": {
+   "p": 0.8227027058601379,
+   "r": 0.8165236115455627,
+   "f": 0.8196015357971191,
+   "s": 932.0
+  },
+  "GPE": {
+   "p": 0.9014912247657776,
+   "r": 0.9065608978271484,
+   "f": 0.9040189981460571,
+   "s": 6935.0
+  },
+  "LANGUAGE": {
+   "p": 0.7272727489471436,
+   "r": 0.6399999856948853,
+   "f": 0.6808510422706604,
+   "s": 25.0
+  },
+  "LAW": {
+   "p": 0.8500000238418579,
+   "r": 0.73046875,
+   "f": 0.7857142686843872,
+   "s": 256.0
+  },
+  "LOC": {
+   "p": 0.8867924809455872,
+   "r": 0.6811594367027283,
+   "f": 0.7704918384552002,
+   "s": 414.0
+  },
+  "MONEY": {
+   "p": 0.873665452003479,
+   "r": 0.8644366264343262,
+   "f": 0.8690265417098999,
+   "s": 1704.0
+  },
+  "NORP": {
+   "p": 0.9220505356788635,
+   "r": 0.875333309173584,
+   "f": 0.898084819316864,
+   "s": 1500.0
+  },
+  "ORDINAL": {
+   "p": 0.9244186282157898,
+   "r": 0.9559118151664734,
+   "f": 0.9399014711380005,
+   "s": 998.0
+  },
+  "ORG": {
+   "p": 0.8920637965202332,
+   "r": 0.8841343522071838,
+   "f": 0.888081431388855,
+   "s": 9675.0
+  },
+  "PERCENT": {
+   "p": 0.8530852198600769,
+   "r": 0.8102325797080994,
+   "f": 0.8311069011688232,
+   "s": 1075.0
+  },
+  "PERSON": {
+   "p": 0.9692212343215942,
+   "r": 0.9716256856918335,
+   "f": 0.9704219698905945,
+   "s": 12899.0
+  },
+  "PRODUCT": {
+   "p": 0.7886179089546204,
+   "r": 0.7538859844207764,
+   "f": 0.7708609104156494,
+   "s": 386.0
+  },
+  "QUANTITY": {
+   "p": 0.8215258717536926,
+   "r": 0.7903014421463013,
+   "f": 0.8056111931800842,
+   "s": 763.0
+  },
+  "TIME": {
+   "p": 0.8752886652946472,
+   "r": 0.8275108933448792,
+   "f": 0.8507295250892639,
+   "s": 1832.0
+  },
+  "WORK_OF_ART": {
+   "p": 0.7937743067741394,
+   "r": 0.7083333134651184,
+   "f": 0.7486238479614258,
+   "s": 576.0
+  },
+  "macro": {
+   "p": 0.8656943440437317,
+   "r": 0.8255618214607239,
+   "f": 0.8441693186759949
+  }
+ }
+}

data/meta-llama/Llama-3.2-1B/STOKE_100/checkpoints/token_classifier/pbK46jjAVx/config_train.json ADDED Viewed

	@@ -0,0 +1,51 @@

+{
+ "path": "data/meta-llama/Llama-3.2-1B/STOKE_100",
+ "splits": [
+  "train",
+  "validation"
+ ],
+ "layers": [
+  8,
+  9,
+  10,
+  11,
+  12
+ ],
+ "hfcache": "",
+ "classifier_dims": [
+  4096
+ ],
+ "learning_rates": [
+  0.0001,
+  5e-05,
+  0.0003
+ ],
+ "cuda": true,
+ "n_steps_per_epoch": 10000,
+ "n_epochs": 30,
+ "batch_size": 8,
+ "balance_loss": false,
+ "loss_weights_span": [
+  [
+   1.0,
+   1.0
+  ],
+  [
+   1.0,
+   50.0
+  ],
+  [
+   1.0,
+   100.0
+  ]
+ ],
+ "time": 1727765390.5829365,
+ "config_dataset": {
+  "generation_kwargs": {
+   "max_new_tokens": 100,
+   "repetition_penalty": 1.2
+  },
+  "model_id": "meta-llama/Llama-3.2-1B",
+  "flair_model_name": "flair/ner-english-ontonotes-large"
+ }
+}

data/meta-llama/Llama-3.2-1B/STOKE_100/config.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+ "generation_kwargs": {
+  "max_new_tokens": 100,
+  "repetition_penalty": 1.2
+ },
+ "model_id": "meta-llama/Llama-3.2-1B",
+ "flair_model_name": "flair/ner-english-ontonotes-large"
+}

data/meta-llama/Llama-3.2-1B/STOKE_100/stoke_config.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+    "default": {
+        "classifier_token": "data/meta-llama/Llama-3.2-1B/STOKE_100/checkpoints/token_classifier/pbK46jjAVx",
+        "classifier_span": "data/meta-llama/Llama-3.2-1B/STOKE_100/checkpoints/span_classifier/Rxi8b70XJA"
+    },
+    "basic": {
+        "classifier_token": "data/meta-llama/Llama-3.2-1B/STOKE_100/checkpoints/token_classifier/dR8xQB4ODU",
+        "classifier_span": "data/meta-llama/Llama-3.2-1B/STOKE_100/checkpoints/span_classifier/Rxi8b70XJA"
+    }
+}

requirements.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+git+https://github.com/nicpopovic/transformers.git@4.45-STOKE
+torch
+matplotlib
+flair
+nltk
+datasets
+torcheval
+gradio