Spaces:
Runtime error
Runtime error
Nick Canu
commited on
Commit
·
ae88252
1
Parent(s):
25bc366
spacy fix
Browse files- Home.py +1 -0
- description_generator.py +2 -3
- requirements.txt +1 -3
- title_generator.py +3 -3
Home.py
CHANGED
|
@@ -160,6 +160,7 @@ def application():
|
|
| 160 |
###Models
|
| 161 |
@st.cache_resource
|
| 162 |
def setup_models():
|
|
|
|
| 163 |
return Title_Generator('./t5_model', slim_df), input_manager(vector_df, slim_df, search_tokens), model_control(apikey=st.secrets.key,model_id=st.secrets.model)
|
| 164 |
|
| 165 |
Tgen, iman, mctrl = setup_models()
|
|
|
|
| 160 |
###Models
|
| 161 |
@st.cache_resource
|
| 162 |
def setup_models():
|
| 163 |
+
spacy.cli.download("en_core_web_md")
|
| 164 |
return Title_Generator('./t5_model', slim_df), input_manager(vector_df, slim_df, search_tokens), model_control(apikey=st.secrets.key,model_id=st.secrets.model)
|
| 165 |
|
| 166 |
Tgen, iman, mctrl = setup_models()
|
description_generator.py
CHANGED
|
@@ -7,13 +7,12 @@ from operator import itemgetter
|
|
| 7 |
#user input manager class
|
| 8 |
class input_manager:
|
| 9 |
|
| 10 |
-
#initialize key dictionary from vector data frame
|
| 11 |
-
def __init__(self,key_df, slim_df, search_tokens
|
| 12 |
self.key_df = key_df
|
| 13 |
self.slim_df = slim_df
|
| 14 |
self.search_tokens = search_tokens
|
| 15 |
self.key = dict(zip(list(key_df.columns),np.zeros(len(key_df.columns))))
|
| 16 |
-
self.top_n = top_n
|
| 17 |
self.nlp = spacy.load("en_core_web_md")
|
| 18 |
|
| 19 |
#translate input text to vector
|
|
|
|
| 7 |
#user input manager class
|
| 8 |
class input_manager:
|
| 9 |
|
| 10 |
+
#initialize key dictionary from vector data frame
|
| 11 |
+
def __init__(self,key_df, slim_df, search_tokens):
|
| 12 |
self.key_df = key_df
|
| 13 |
self.slim_df = slim_df
|
| 14 |
self.search_tokens = search_tokens
|
| 15 |
self.key = dict(zip(list(key_df.columns),np.zeros(len(key_df.columns))))
|
|
|
|
| 16 |
self.nlp = spacy.load("en_core_web_md")
|
| 17 |
|
| 18 |
#translate input text to vector
|
requirements.txt
CHANGED
|
@@ -8,6 +8,4 @@ sentencepiece==0.1.97
|
|
| 8 |
spacy==3.5.1
|
| 9 |
streamlit==1.20.0
|
| 10 |
torch==2.0.0
|
| 11 |
-
transformers==4.27.3
|
| 12 |
-
en_core_web_sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.4.0/en_core_web_sm-3.4.0-py3-none-any.whl
|
| 13 |
-
en_core_web_md @ https://github.com/explosion/spacy-models/releases/download/en_core_web_md-3.4.0/en_core_web_md-3.4.0-py3-none-any.whl
|
|
|
|
| 8 |
spacy==3.5.1
|
| 9 |
streamlit==1.20.0
|
| 10 |
torch==2.0.0
|
| 11 |
+
transformers==4.27.3
|
|
|
|
|
|
title_generator.py
CHANGED
|
@@ -5,10 +5,12 @@ from gensim.parsing import preprocess_string, strip_tags, strip_numeric, strip_m
|
|
| 5 |
import spacy
|
| 6 |
import torch
|
| 7 |
from transformers import T5ForConditionalGeneration,T5Tokenizer
|
|
|
|
|
|
|
| 8 |
|
| 9 |
#Custom text tokenizer from https://github.com/canunj/deconstructing_games by N Canu & K Chen
|
| 10 |
def doc_text_preprocessing(ser):
|
| 11 |
-
nlp=spacy.load("
|
| 12 |
|
| 13 |
"""text processing steps"""
|
| 14 |
import re
|
|
@@ -68,8 +70,6 @@ class Title_Generator:
|
|
| 68 |
return candidates, description
|
| 69 |
|
| 70 |
def candidate_score(self,candidates,ex_check=None):
|
| 71 |
-
import random
|
| 72 |
-
from operator import itemgetter
|
| 73 |
|
| 74 |
if ex_check != None:
|
| 75 |
pat = re.compile("((?:" + "|".join(map(re.escape, candidates[0]+[cand.upper() for cand in candidates[0]])) + "|" + "|".join(ex_check) +"))")
|
|
|
|
| 5 |
import spacy
|
| 6 |
import torch
|
| 7 |
from transformers import T5ForConditionalGeneration,T5Tokenizer
|
| 8 |
+
import random
|
| 9 |
+
import itemgetter
|
| 10 |
|
| 11 |
#Custom text tokenizer from https://github.com/canunj/deconstructing_games by N Canu & K Chen
|
| 12 |
def doc_text_preprocessing(ser):
|
| 13 |
+
nlp=spacy.load("en_core_web_md", exclude=['parser','ner','textcat'])
|
| 14 |
|
| 15 |
"""text processing steps"""
|
| 16 |
import re
|
|
|
|
| 70 |
return candidates, description
|
| 71 |
|
| 72 |
def candidate_score(self,candidates,ex_check=None):
|
|
|
|
|
|
|
| 73 |
|
| 74 |
if ex_check != None:
|
| 75 |
pat = re.compile("((?:" + "|".join(map(re.escape, candidates[0]+[cand.upper() for cand in candidates[0]])) + "|" + "|".join(ex_check) +"))")
|