Spaces:
Build error
Build error
Commit ·
3347c60
1
Parent(s): 5716061
Create app.py
Browse files
app.py
ADDED
|
@@ -0,0 +1,102 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import spacy
|
| 2 |
+
import numpy as np
|
| 3 |
+
|
| 4 |
+
# load the spacy model
|
| 5 |
+
spacy.cli.download("en_core_web_lg")
|
| 6 |
+
|
| 7 |
+
# Load the spacy model with GloVe embeddings
|
| 8 |
+
nlp = spacy.load('en_core_web_lg')
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
# Define earthquake-related keywords
|
| 12 |
+
earthquake_single_keywords = ['earthquake', 'seismic', 'tremor', 'quake', 'aftershock', 'seismology', 'tectonic', 'plate', 'seismometer', 'shake', 'temblor', 'trembler', 'seism', 'shock', 'vibration', 'groundswell']
|
| 13 |
+
|
| 14 |
+
# Compute embeddings for single-word keywords
|
| 15 |
+
earthquake_single_embeddings = [nlp(keyword).vector for keyword in earthquake_single_keywords]
|
| 16 |
+
|
| 17 |
+
# Define multi-word earthquake-related keywords
|
| 18 |
+
earthquake_multi_keywords = ['seismic activity', 'earthquake risk', 'earthquake zone', 'seismic waves', 'earthquake damage', 'seismic shift', 'tectonic plates', 'fault line', 'seismic retrofitting', 'seismic hazard', 'aftershock sequence', 'earthquake drill', 'seismic reflection', 'plate tectonics', 'seismic reflection imaging', 'seismic tomography', 'seismic profiling', 'seismic energy release', 'seismicity pattern', 'earthquake swarm', 'seismic gap', 'seismic inversion', 'seismic reflection', 'seismic scattering', 'seismic attenuation', 'seismic imaging', 'seismic map', 'seismic data', 'earthquake monitoring', 'seismic data analysis', 'earth shaking']
|
| 19 |
+
|
| 20 |
+
# Compute embeddings for multi-word keywords
|
| 21 |
+
earthquake_multi_embeddings = []
|
| 22 |
+
for keyword in earthquake_multi_keywords:
|
| 23 |
+
keyword_embeddings = [nlp(w).vector for w in keyword.split()]
|
| 24 |
+
combined_emb = np.mean(keyword_embeddings, axis=0) # Compute the average embedding for the multi-word token
|
| 25 |
+
earthquake_multi_embeddings.append(combined_emb)
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
# Define a function to compute the semantic similarity between a word and a set of embeddings
|
| 29 |
+
def compute_similarity_earthquake(word, embeddings, excluded_keywords):
|
| 30 |
+
"""
|
| 31 |
+
Compute the semantic similarity for earthquaqe events
|
| 32 |
+
"""
|
| 33 |
+
|
| 34 |
+
# Check if the word is in the excluded keywords list
|
| 35 |
+
if word in excluded_keywords:
|
| 36 |
+
return False
|
| 37 |
+
|
| 38 |
+
# Compute the GloVe embedding of the word
|
| 39 |
+
word_emb = nlp(word).vector
|
| 40 |
+
|
| 41 |
+
# Compute the cosine similarity between the word embedding and the keyword embeddings
|
| 42 |
+
similarity_scores = [np.dot(word_emb, emb) / (np.linalg.norm(word_emb) * np.linalg.norm(emb)) for emb in embeddings]
|
| 43 |
+
|
| 44 |
+
# Return True if the maximum similarity score is above a certain threshold, else False
|
| 45 |
+
return max(similarity_scores) > 0.65
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
def identify_earthquake_event(input_sentence):
|
| 49 |
+
"""
|
| 50 |
+
Compute the semantic similarity for earthquaqe events
|
| 51 |
+
"""
|
| 52 |
+
|
| 53 |
+
# Define excluded keywords to ignore (because cases like I want bars with magnituted 6 - were given as correct)
|
| 54 |
+
excluded_keywords = ['magnitude', 'richter', 'moment', 'scale', 'intensity', 'amplitude', 'energy', 'force', 'power', 'seismicity']
|
| 55 |
+
|
| 56 |
+
# Check for single-word earthquake-related keywords
|
| 57 |
+
is_earthquake_related = any([compute_similarity_earthquake(word.text.lower(), earthquake_single_embeddings, excluded_keywords) for word in nlp(input_sentence)])
|
| 58 |
+
|
| 59 |
+
# If no single-word keywords are found, check for multi-word keywords
|
| 60 |
+
if not is_earthquake_related:
|
| 61 |
+
|
| 62 |
+
# Check 2-grams
|
| 63 |
+
for i in range(len(nlp(input_sentence))-1):
|
| 64 |
+
bigram = nlp(input_sentence)[i:i+2].text.lower()
|
| 65 |
+
if compute_similarity_earthquake(bigram, earthquake_multi_embeddings, excluded_keywords):
|
| 66 |
+
is_earthquake_related = True
|
| 67 |
+
break
|
| 68 |
+
|
| 69 |
+
# Check 3-grams
|
| 70 |
+
if not is_earthquake_related:
|
| 71 |
+
for i in range(len(nlp(input_sentence))-2):
|
| 72 |
+
trigram = nlp(input_sentence)[i:i+3].text.lower()
|
| 73 |
+
if compute_similarity_earthquake(trigram, earthquake_multi_embeddings, excluded_keywords):
|
| 74 |
+
is_earthquake_related = True
|
| 75 |
+
break
|
| 76 |
+
|
| 77 |
+
return {'earthquaqe_event': is_earthquake_related}
|
| 78 |
+
|
| 79 |
+
|
| 80 |
+
from transformers import pipeline
|
| 81 |
+
import gradio as gr
|
| 82 |
+
|
| 83 |
+
title = "Natural Language module Demo for Earthquaqe events identification"
|
| 84 |
+
description = "This is a simple demo just for demonstration purposes, so that Serco team might have the chance to validate the results of the Natural Language module concerning the earthquaqe event identification, while in progress"
|
| 85 |
+
|
| 86 |
+
examples = [
|
| 87 |
+
["I want all earthquakes larger than 5.0 that occurred in Rome during 3/5/20"],
|
| 88 |
+
["I want all seism larger than 5.0 that occurred in Rome between January 2020 and February of the same year"],
|
| 89 |
+
["earth shakes located in Ishkoshim"],
|
| 90 |
+
["give me all the bars with magnitude above than 6 in the region of Athens for the month of January, 1990"]
|
| 91 |
+
]
|
| 92 |
+
|
| 93 |
+
|
| 94 |
+
gr.Interface(
|
| 95 |
+
fn=identify_earthquake_event,
|
| 96 |
+
inputs="text",
|
| 97 |
+
outputs="text",
|
| 98 |
+
title=title,
|
| 99 |
+
description=description,
|
| 100 |
+
examples=examples,
|
| 101 |
+
enable_queue=True,
|
| 102 |
+
).launch()
|