Delete pages/Sequence Classification.py
Browse files- pages/Sequence Classification.py +0 -137
pages/Sequence Classification.py
DELETED
|
@@ -1,137 +0,0 @@
|
|
| 1 |
-
import streamlit as st
|
| 2 |
-
import sparknlp
|
| 3 |
-
|
| 4 |
-
from sparknlp.base import *
|
| 5 |
-
from sparknlp.annotator import *
|
| 6 |
-
from pyspark.ml import Pipeline
|
| 7 |
-
from annotated_text import annotated_text
|
| 8 |
-
|
| 9 |
-
# Page configuration
|
| 10 |
-
st.set_page_config(
|
| 11 |
-
layout="wide",
|
| 12 |
-
initial_sidebar_state="auto"
|
| 13 |
-
)
|
| 14 |
-
|
| 15 |
-
# CSS for styling
|
| 16 |
-
st.markdown("""
|
| 17 |
-
<style>
|
| 18 |
-
.main-title {
|
| 19 |
-
font-size: 36px;
|
| 20 |
-
color: #4A90E2;
|
| 21 |
-
font-weight: bold;
|
| 22 |
-
text-align: center;
|
| 23 |
-
}
|
| 24 |
-
.section {
|
| 25 |
-
background-color: #f9f9f9;
|
| 26 |
-
padding: 10px;
|
| 27 |
-
border-radius: 10px;
|
| 28 |
-
margin-top: 10px;
|
| 29 |
-
}
|
| 30 |
-
.section p, .section ul {
|
| 31 |
-
color: #666666;
|
| 32 |
-
}
|
| 33 |
-
</style>
|
| 34 |
-
""", unsafe_allow_html=True)
|
| 35 |
-
|
| 36 |
-
@st.cache_resource
|
| 37 |
-
def init_spark():
|
| 38 |
-
return sparknlp.start()
|
| 39 |
-
|
| 40 |
-
@st.cache_resource
|
| 41 |
-
def create_pipeline():
|
| 42 |
-
document_assembler = DocumentAssembler() \
|
| 43 |
-
.setInputCol('text') \
|
| 44 |
-
.setOutputCol('document')
|
| 45 |
-
|
| 46 |
-
tokenizer = Tokenizer() \
|
| 47 |
-
.setInputCols(['document']) \
|
| 48 |
-
.setOutputCol('token')
|
| 49 |
-
|
| 50 |
-
sequence_classifier = XlmRoBertaForSequenceClassification.pretrained('xlm_roberta_base_sequence_classifier_imdb', 'en') \
|
| 51 |
-
.setInputCols(["document", "token"]) \
|
| 52 |
-
.setOutputCol("class")
|
| 53 |
-
|
| 54 |
-
pipeline = Pipeline(stages=[document_assembler, tokenizer, sequence_classifier])
|
| 55 |
-
return pipeline
|
| 56 |
-
|
| 57 |
-
def fit_data(pipeline, data):
|
| 58 |
-
empty_df = spark.createDataFrame([['']]).toDF('text')
|
| 59 |
-
pipeline_model = pipeline.fit(empty_df)
|
| 60 |
-
model = LightPipeline(pipeline_model)
|
| 61 |
-
result = model.fullAnnotate(data)
|
| 62 |
-
return result
|
| 63 |
-
|
| 64 |
-
def annotate(data):
|
| 65 |
-
document, chunks, labels = data["Document"], data["NER Chunk"], data["NER Label"]
|
| 66 |
-
annotated_words = []
|
| 67 |
-
for chunk, label in zip(chunks, labels):
|
| 68 |
-
parts = document.split(chunk, 1)
|
| 69 |
-
if parts[0]:
|
| 70 |
-
annotated_words.append(parts[0])
|
| 71 |
-
annotated_words.append((chunk, label))
|
| 72 |
-
document = parts[1]
|
| 73 |
-
if document:
|
| 74 |
-
annotated_words.append(document)
|
| 75 |
-
annotated_text(*annotated_words)
|
| 76 |
-
|
| 77 |
-
tasks_models_descriptions = {
|
| 78 |
-
"Sequence Classification": {
|
| 79 |
-
"models": ["xlm_roberta_base_sequence_classifier_imdb"],
|
| 80 |
-
"description": "The 'xlm_roberta_base_sequence_classifier_imdb' model is specialized for sentiment analysis of movie reviews. It accurately classifies IMDb reviews as positive or negative, leveraging the multilingual capabilities of XLM-RoBERTa to analyze text content and sentiment across different languages."
|
| 81 |
-
}
|
| 82 |
-
}
|
| 83 |
-
|
| 84 |
-
# Sidebar content
|
| 85 |
-
task = st.sidebar.selectbox("Choose the task", list(tasks_models_descriptions.keys()))
|
| 86 |
-
model = st.sidebar.selectbox("Choose the pretrained model", tasks_models_descriptions[task]["models"], help="For more info about the models visit: https://sparknlp.org/models")
|
| 87 |
-
|
| 88 |
-
# Reference notebook link in sidebar
|
| 89 |
-
link = """
|
| 90 |
-
<a href="https://github.com/JohnSnowLabs/spark-nlp-workshop/blob/357691d18373d6e8f13b5b1015137a398fd0a45f/Spark_NLP_Udemy_MOOC/Open_Source/17.01.Transformers-based_Embeddings.ipynb#L103">
|
| 91 |
-
<img src="https://colab.research.google.com/assets/colab-badge.svg" style="zoom: 1.3" alt="Open In Colab"/>
|
| 92 |
-
</a>
|
| 93 |
-
"""
|
| 94 |
-
st.sidebar.markdown('Reference notebook:')
|
| 95 |
-
st.sidebar.markdown(link, unsafe_allow_html=True)
|
| 96 |
-
|
| 97 |
-
# Page content
|
| 98 |
-
title, sub_title = (f'DeBERTa for {task}', tasks_models_descriptions[task]["description"])
|
| 99 |
-
st.markdown(f'<div class="main-title">{title}</div>', unsafe_allow_html=True)
|
| 100 |
-
container = st.container(border=True)
|
| 101 |
-
container.write(sub_title)
|
| 102 |
-
|
| 103 |
-
# Load examples
|
| 104 |
-
examples_mapping = {
|
| 105 |
-
"Sequence Classification": [
|
| 106 |
-
"This movie was absolutely fantastic! The storyline was gripping, the characters were well-developed, and the cinematography was stunning. I was on the edge of my seat the entire time.",
|
| 107 |
-
"A heartwarming and beautiful film. The performances were top-notch, and the direction was flawless. This is easily one of the best movies I've seen this year.",
|
| 108 |
-
"What a delightful surprise! The humor was spot on, and the plot was refreshingly original. The cast did an amazing job bringing the characters to life. Highly recommended!",
|
| 109 |
-
"This was one of the worst movies I’ve ever seen. The plot was predictable, the acting was wooden, and the pacing was painfully slow. I couldn’t wait for it to end.",
|
| 110 |
-
"A complete waste of time. The movie lacked any real substance or direction, and the dialogue was cringe-worthy. I wouldn’t recommend this to anyone.",
|
| 111 |
-
"I had high hopes for this film, but it turned out to be a huge disappointment. The story was disjointed, and the special effects were laughably bad. Don’t bother watching this one.",
|
| 112 |
-
"The movie was okay, but nothing special. It had a few good moments, but overall, it felt pretty average. Not something I would watch again, but it wasn’t terrible either.",
|
| 113 |
-
"An average film with a decent plot. The acting was passable, but it didn't leave much of an impression on me. It's a movie you might watch once and forget about.",
|
| 114 |
-
"This movie was neither good nor bad, just kind of there. It had some interesting ideas, but they weren’t executed very well. It’s a film you could take or leave."
|
| 115 |
-
]
|
| 116 |
-
}
|
| 117 |
-
|
| 118 |
-
examples = examples_mapping[task]
|
| 119 |
-
selected_text = st.selectbox("Select an example", examples)
|
| 120 |
-
custom_input = st.text_input("Try it with your own Sentence!")
|
| 121 |
-
|
| 122 |
-
try:
|
| 123 |
-
text_to_analyze = custom_input if custom_input else selected_text
|
| 124 |
-
st.subheader('Full example text')
|
| 125 |
-
HTML_WRAPPER = """<div class="scroll entities" style="overflow-x: auto; border: 1px solid #e6e9ef; border-radius: 0.25rem; padding: 1rem; margin-bottom: 2.5rem; white-space:pre-wrap">{}</div>"""
|
| 126 |
-
st.markdown(HTML_WRAPPER.format(text_to_analyze), unsafe_allow_html=True)
|
| 127 |
-
except:
|
| 128 |
-
text_to_analyze = selected_text
|
| 129 |
-
|
| 130 |
-
# Initialize Spark and create pipeline
|
| 131 |
-
spark = init_spark()
|
| 132 |
-
pipeline = create_pipeline()
|
| 133 |
-
output = fit_data(pipeline, text_to_analyze)
|
| 134 |
-
|
| 135 |
-
# Display matched sentence
|
| 136 |
-
st.subheader("Prediction:")
|
| 137 |
-
st.markdown(f"Classified as : **{output[0]['class'][0].result}**")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|