Gosse Minnema commited on
Commit ·
b8ce02e
1
Parent(s): bfd79cd
Re-enable data-loading, change port
Browse files- Dockerfile +3 -3
- README.md +1 -0
- sociofillmore/__pycache__/__init__.cpython-39.pyc +0 -0
- sociofillmore/common/__pycache__/__init__.cpython-39.pyc +0 -0
- sociofillmore/common/__pycache__/analyze_text.cpython-39.pyc +0 -0
- sociofillmore/common/analyze_text.py +1 -1
- sociofillmore/webapp/__pycache__/__init__.cpython-39.pyc +0 -0
- sociofillmore/webapp/__pycache__/app.cpython-39.pyc +0 -0
- sociofillmore/webapp/app.py +14 -32
Dockerfile
CHANGED
|
@@ -13,9 +13,9 @@ RUN chmod -R 777 /.local
|
|
| 13 |
RUN curl -s https://packagecloud.io/install/repositories/github/git-lfs/script.deb.sh | bash
|
| 14 |
RUN apt-get install git-lfs
|
| 15 |
RUN git lfs install
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
|
| 20 |
RUN pip install -v -r requirements.txt
|
| 21 |
CMD ["sh", "docker_commands.sh"]
|
|
|
|
| 13 |
RUN curl -s https://packagecloud.io/install/repositories/github/git-lfs/script.deb.sh | bash
|
| 14 |
RUN apt-get install git-lfs
|
| 15 |
RUN git lfs install
|
| 16 |
+
RUN --mount=type=secret,id=HF_TOKEN,mode=0444,required=true \
|
| 17 |
+
git clone https://gossminn:$(cat /run/secrets/HF_TOKEN)@huggingface.co/datasets/responsibility-framing/sociofillmore-datasets /.cache/sfdata
|
| 18 |
+
RUN chmod -R 777 /.cache/sfdata
|
| 19 |
|
| 20 |
RUN pip install -v -r requirements.txt
|
| 21 |
CMD ["sh", "docker_commands.sh"]
|
README.md
CHANGED
|
@@ -5,6 +5,7 @@ colorFrom: yellow
|
|
| 5 |
colorTo: red
|
| 6 |
sdk: docker
|
| 7 |
pinned: false
|
|
|
|
| 8 |
---
|
| 9 |
|
| 10 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
|
| 5 |
colorTo: red
|
| 6 |
sdk: docker
|
| 7 |
pinned: false
|
| 8 |
+
app_port: 5000
|
| 9 |
---
|
| 10 |
|
| 11 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
sociofillmore/__pycache__/__init__.cpython-39.pyc
CHANGED
|
Binary files a/sociofillmore/__pycache__/__init__.cpython-39.pyc and b/sociofillmore/__pycache__/__init__.cpython-39.pyc differ
|
|
|
sociofillmore/common/__pycache__/__init__.cpython-39.pyc
CHANGED
|
Binary files a/sociofillmore/common/__pycache__/__init__.cpython-39.pyc and b/sociofillmore/common/__pycache__/__init__.cpython-39.pyc differ
|
|
|
sociofillmore/common/__pycache__/analyze_text.cpython-39.pyc
CHANGED
|
Binary files a/sociofillmore/common/__pycache__/analyze_text.cpython-39.pyc and b/sociofillmore/common/__pycache__/analyze_text.cpython-39.pyc differ
|
|
|
sociofillmore/common/analyze_text.py
CHANGED
|
@@ -16,7 +16,7 @@ from nltk.corpus import framenet as fn
|
|
| 16 |
from nltk.corpus.reader.framenet import FramenetError
|
| 17 |
from spacy.tokens import Token
|
| 18 |
|
| 19 |
-
|
| 20 |
|
| 21 |
ITALIAN_ACTIVE_AUX = ["avere", "ha", "ho", "hai", "avete", "hanno", "abbiamo"]
|
| 22 |
DUTCH_ACTIVE_AUX = ["heb", "hebben", "heeft"]
|
|
|
|
| 16 |
from nltk.corpus.reader.framenet import FramenetError
|
| 17 |
from spacy.tokens import Token
|
| 18 |
|
| 19 |
+
from sociofillmore.crashes.utils import is_a_dutch_text
|
| 20 |
|
| 21 |
ITALIAN_ACTIVE_AUX = ["avere", "ha", "ho", "hai", "avete", "hanno", "abbiamo"]
|
| 22 |
DUTCH_ACTIVE_AUX = ["heb", "hebben", "heeft"]
|
sociofillmore/webapp/__pycache__/__init__.cpython-39.pyc
CHANGED
|
Binary files a/sociofillmore/webapp/__pycache__/__init__.cpython-39.pyc and b/sociofillmore/webapp/__pycache__/__init__.cpython-39.pyc differ
|
|
|
sociofillmore/webapp/__pycache__/app.cpython-39.pyc
CHANGED
|
Binary files a/sociofillmore/webapp/__pycache__/app.cpython-39.pyc and b/sociofillmore/webapp/__pycache__/app.cpython-39.pyc differ
|
|
|
sociofillmore/webapp/app.py
CHANGED
|
@@ -42,14 +42,14 @@ from sociofillmore.common.analyze_text import (
|
|
| 42 |
get_tarball_blocks,
|
| 43 |
analyze_external_file
|
| 44 |
)
|
| 45 |
-
|
| 46 |
|
| 47 |
|
| 48 |
-
#
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
|
| 54 |
# security (very basic!)
|
| 55 |
PROTECTED_DATASETS = [] # "femicides/rai"
|
|
@@ -68,9 +68,6 @@ else:
|
|
| 68 |
print("Defining app...")
|
| 69 |
app = Flask(__name__)
|
| 70 |
app.secret_key = SECRET_KEY
|
| 71 |
-
# app.config["SESSION_PERMANENT"] = False
|
| 72 |
-
# app.config["SESSION_TYPE"] = "filesystem"
|
| 73 |
-
# Session(app)
|
| 74 |
|
| 75 |
# gensim & spacy models
|
| 76 |
def load_gensim_model(limit):
|
|
@@ -78,7 +75,7 @@ def load_gensim_model(limit):
|
|
| 78 |
return gensim.models.word2vec.KeyedVectors.load_word2vec_format("data/embeddings/concat_glove_frames.w2v.txt", limit=limit)
|
| 79 |
|
| 80 |
gensim_m = None
|
| 81 |
-
|
| 82 |
|
| 83 |
print("Loading SpaCy models...")
|
| 84 |
spacy_model_ud = spacy.load("xx_sent_ud_sm")
|
|
@@ -171,18 +168,11 @@ def read_crashes_provider_attrs():
|
|
| 171 |
|
| 172 |
return attr_map, name_map
|
| 173 |
|
| 174 |
-
# PROVIDER_ATTRS = {
|
| 175 |
-
# "femicides/rai": read_rai_provider_attrs(),
|
| 176 |
-
# "femicides/olv": {},
|
| 177 |
-
# "crashes/thecrashes": read_crashes_provider_attrs()[0],
|
| 178 |
-
# "migration/pavia": read_migration_provider_attrs()
|
| 179 |
-
# }
|
| 180 |
-
|
| 181 |
PROVIDER_ATTRS = {
|
| 182 |
-
"femicides/rai":
|
| 183 |
"femicides/olv": {},
|
| 184 |
-
"crashes/thecrashes":
|
| 185 |
-
"migration/pavia":
|
| 186 |
}
|
| 187 |
|
| 188 |
|
|
@@ -229,20 +219,12 @@ def load_event_data(dataset):
|
|
| 229 |
|
| 230 |
|
| 231 |
DATASETS = {
|
| 232 |
-
"femicides/rai":
|
| 233 |
-
"femicides/olv":
|
| 234 |
-
"crashes/thecrashes":
|
| 235 |
-
"migration/pavia":
|
| 236 |
}
|
| 237 |
|
| 238 |
-
|
| 239 |
-
# DATASETS = {
|
| 240 |
-
# "femicides/rai": load_event_data("femicides/rai"),
|
| 241 |
-
# "femicides/olv": load_event_data("femicides/olv"),
|
| 242 |
-
# "crashes/thecrashes": load_event_data("crashes/thecrashes"),
|
| 243 |
-
# "migration/pavia": load_event_data("migration/pavia"),
|
| 244 |
-
# }
|
| 245 |
-
|
| 246 |
SKIP_FUNCTIONS = {
|
| 247 |
"femicides/rai": None,
|
| 248 |
"femicides/olv": None,
|
|
|
|
| 42 |
get_tarball_blocks,
|
| 43 |
analyze_external_file
|
| 44 |
)
|
| 45 |
+
from sociofillmore.crashes.utils import is_a_dutch_text
|
| 46 |
|
| 47 |
|
| 48 |
+
# download nltk packages if needed
|
| 49 |
+
if sys.argv[2] != "local":
|
| 50 |
+
nltk.download("framenet_v17", download_dir="/nltk_data")
|
| 51 |
+
nltk.download("punkt", download_dir="/nltk_data")
|
| 52 |
+
print("Done!")
|
| 53 |
|
| 54 |
# security (very basic!)
|
| 55 |
PROTECTED_DATASETS = [] # "femicides/rai"
|
|
|
|
| 68 |
print("Defining app...")
|
| 69 |
app = Flask(__name__)
|
| 70 |
app.secret_key = SECRET_KEY
|
|
|
|
|
|
|
|
|
|
| 71 |
|
| 72 |
# gensim & spacy models
|
| 73 |
def load_gensim_model(limit):
|
|
|
|
| 75 |
return gensim.models.word2vec.KeyedVectors.load_word2vec_format("data/embeddings/concat_glove_frames.w2v.txt", limit=limit)
|
| 76 |
|
| 77 |
gensim_m = None
|
| 78 |
+
gensim_m = load_gensim_model(100_000)
|
| 79 |
|
| 80 |
print("Loading SpaCy models...")
|
| 81 |
spacy_model_ud = spacy.load("xx_sent_ud_sm")
|
|
|
|
| 168 |
|
| 169 |
return attr_map, name_map
|
| 170 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 171 |
PROVIDER_ATTRS = {
|
| 172 |
+
"femicides/rai": read_rai_provider_attrs(),
|
| 173 |
"femicides/olv": {},
|
| 174 |
+
"crashes/thecrashes": read_crashes_provider_attrs()[0],
|
| 175 |
+
"migration/pavia": read_migration_provider_attrs()
|
| 176 |
}
|
| 177 |
|
| 178 |
|
|
|
|
| 219 |
|
| 220 |
|
| 221 |
DATASETS = {
|
| 222 |
+
"femicides/rai": load_event_data("femicides/rai"),
|
| 223 |
+
"femicides/olv": load_event_data("femicides/olv"),
|
| 224 |
+
"crashes/thecrashes": load_event_data("crashes/thecrashes"),
|
| 225 |
+
"migration/pavia": load_event_data("migration/pavia"),
|
| 226 |
}
|
| 227 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 228 |
SKIP_FUNCTIONS = {
|
| 229 |
"femicides/rai": None,
|
| 230 |
"femicides/olv": None,
|