Spaces:
Runtime error
Runtime error
| import nltk | |
| from bs4 import BeautifulSoup | |
| import joblib | |
| import gradio as gr | |
| nltk.download('stopwords') | |
| nltk.download('wordnet') | |
| classes = ['.net', 'android', 'asp.net', 'c', 'c#', 'c++', 'css', 'html', | |
| 'ios', 'iphone', 'java', 'javascript', 'jquery', 'json', 'linux', | |
| 'mysql', 'node.js', 'objective-c', 'performance', 'php', 'python', | |
| 'reactjs', 'ruby-on-rails', 'spring', 'sql', 'sql-server', 'swift', | |
| 'unit-testing', 'windows', 'xcode'] | |
| pa_classifier = joblib.load("passive_aggressive_classifier.pkl") | |
| tfidf_vectorizer = joblib.load("tfidf_vectorizer.pkl") | |
| def run(text: str): | |
| text = clean_input_string(text) | |
| vectors = get_tf_idf_vectors([text]) | |
| prediction_list = pa_classifier.predict(vectors)[0] | |
| predicted_classes = [classes[i] for i, pred in enumerate(prediction_list) if pred == 1] | |
| if predicted_classes: | |
| return " ".join(predicted_classes) | |
| return "Pas de thème retrouvé." | |
| def get_tf_idf_vectors(x_as_string): | |
| return tfidf_vectorizer.transform(x_as_string) | |
| def clean_input_string(text: str) -> str: | |
| text = remove_escape_sequences(text) | |
| text = remove_html_tags(text) | |
| text_as_list = remove_punctuation(text) | |
| text_as_list = remove_stopwords(text_as_list) | |
| text_as_list = lemmatize(text_as_list) | |
| return " ".join(text_as_list) | |
| def remove_escape_sequences(text: str): | |
| return text.encode('unicode-escape').decode('utf-8').replace('\\n', '').replace('\\t', '').replace('\\r', '').replace('\\\\', '').replace('\\\'', '').replace('\"', '') | |
| def remove_html_tags(text: str): | |
| soup = BeautifulSoup(text, 'html.parser') | |
| return soup.get_text() | |
| def remove_punctuation(text: str): | |
| tokenizer = nltk.RegexpTokenizer(r'[a-zA-Z]+') | |
| text = text.lower() | |
| return tokenizer.tokenize(text) | |
| def remove_stopwords(words_list: list): | |
| sw = set() | |
| sw.update(tuple(nltk.corpus.stopwords.words('english'))) | |
| return [word for word in words_list if word not in sw] | |
| def lemmatize(words_list: list): | |
| lemmatizer = nltk.WordNetLemmatizer() | |
| return [lemmatizer.lemmatize(word) for word in words_list] | |
| def greet(name, intensity): | |
| return "Hello, " + name + "!" * int(intensity) | |
| demo = gr.Interface( | |
| fn=run, | |
| inputs=["text"], | |
| outputs=["text"], | |
| ) | |
| demo.launch() |