Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| # For loading files | |
| from joblib import dump, load | |
| # Model hub | |
| import tensorflow_hub as hub | |
| # Language/text | |
| import spacy | |
| from bs4 import BeautifulSoup | |
| from spacy.symbols import ORTH | |
| # for listing tags from binary sequence | |
| from itertools import compress | |
| #------------------------------------------ | |
| # Loading files | |
| path = './trained_models/' | |
| filename_model = 'multinomialNB-use.joblib' | |
| filename_scaler = 'scaler.joblib' | |
| # Loading model | |
| clf = load(path + filename_model) | |
| # Loading scaler | |
| scaler = load(path + filename_scaler) | |
| # Defining parameters | |
| thresh = 0.4 | |
| tag_list = ['c#', | |
| 'java', | |
| 'javascript', | |
| 'python', | |
| 'c++', | |
| 'ios', | |
| 'android', | |
| '.net', | |
| 'html', | |
| 'php', | |
| 'objective-c', | |
| 'jquery', | |
| 'c', | |
| 'iphone', | |
| 'sql', | |
| 'asp.net', | |
| 'css', | |
| 'linux', | |
| 'node.js', | |
| 'performance', | |
| 'spring', | |
| 'windows', | |
| 'swift', | |
| 'xcode', | |
| 'ruby-on-rails', | |
| 'mysql', | |
| 'json', | |
| 'sql-server', | |
| 'multithreading', | |
| 'asp.net-mvc', | |
| 'ruby', | |
| 'database', | |
| 'wpf', | |
| 'unit-testing', | |
| 'macos', | |
| 'arrays', | |
| 'c++11', | |
| 'django'] | |
| # Instantiating language model, english | |
| nlp = spacy.load("en_core_web_sm") | |
| import en_core_web_sm | |
| nlp = en_core_web_sm.load() | |
| # Importing stopwords | |
| with open('./stopwords/stopwords.txt') as file: | |
| my_stopwords = {line.rstrip() for line in file} | |
| # Adding my_stopwords to spacy stopwords | |
| nlp.Defaults.stop_words = nlp.Defaults.stop_words.union(my_stopwords) | |
| # Import and instantiate embedding model | |
| embed = hub.load("https://tfhub.dev/google/universal-sentence-encoder/4") | |
| # Function definitions | |
| def remove_code(text): | |
| """ | |
| Removes "<code> some text </code>" from a text. | |
| or "<script> some text </script>" | |
| Parameters | |
| - text : str | |
| """ | |
| soup = BeautifulSoup(text,'lxml') | |
| code_to_remove = soup.findAll('code') | |
| for code in code_to_remove: | |
| code.replace_with(' ') | |
| code_to_remove = soup.findAll('script') | |
| for code in code_to_remove: | |
| code.replace_with(' ') | |
| return str(soup) | |
| def clean(text,tokenize=False,strict=False, **kwargs): | |
| """ | |
| Returns a dictionnary with keys 'text' or 'tokens', where | |
| 'tokens' corresponds tothe list of lemmatized tokens from | |
| the string text. Ommiting stopwords and punctuation, and the text is | |
| the joint text. | |
| Parameters: | |
| - text: str | |
| - tokenize: bool | |
| If True returns list of tokens, if False returns string. | |
| - strict: bool | |
| If true only keeps nouns | |
| """ | |
| # Removing <code>some code</code> | |
| clean_txt = remove_code(text) | |
| # Removing HTML tags | |
| soup = BeautifulSoup(clean_txt, features='html.parser') | |
| clean_txt = soup.get_text() | |
| # Removing new line character: \n | |
| clean_txt = clean_txt.replace('\n', ' ') | |
| # Removing unicode characters | |
| clean_txt = clean_txt.encode("ascii", "ignore").decode() | |
| # Removing digits | |
| clean_txt = ''.join(char for char in clean_txt if not char.isdigit()) | |
| # Replacing 'c ++' and 'c #' for 'c++' and 'c#' and others | |
| clean_txt = clean_txt.replace('c ++', 'c++') | |
| clean_txt = clean_txt.replace('c #', 'c#') | |
| clean_txt = clean_txt.replace('C ++', 'c++') | |
| clean_txt = clean_txt.replace('C #', 'c#') | |
| clean_txt = clean_txt.replace('C#', 'c#') | |
| clean_txt = clean_txt.replace('C ++', 'c++') | |
| # Adding special case rule | |
| special_case = [{ORTH: "c#"}] | |
| nlp.tokenizer.add_special_case("c#", special_case) | |
| special_case = [{ORTH: ".net"}] | |
| nlp.tokenizer.add_special_case(".net", special_case) | |
| special_case = [{ORTH: "objective-c"}] | |
| nlp.tokenizer.add_special_case("objective-c", special_case) | |
| special_case = [{ORTH: "asp.net"}] | |
| nlp.tokenizer.add_special_case("asp.net", special_case) | |
| special_case = [{ORTH: "node.js"}] | |
| nlp.tokenizer.add_special_case("node.js", special_case) | |
| special_case = [{ORTH: "ruby-on-rails"}] | |
| nlp.tokenizer.add_special_case("ruby-on-rails", special_case) | |
| special_case = [{ORTH: "sql-server"}] | |
| nlp.tokenizer.add_special_case("sql-server", special_case) | |
| special_case = [{ORTH: "unit-testing"}] | |
| nlp.tokenizer.add_special_case("unit-testing", special_case) | |
| # Tokenize with spacy | |
| doc = nlp(clean_txt) | |
| # Tokenize properties | |
| if strict == True: | |
| tokens = [token.lemma_.lower() for token in doc | |
| if token.pos_ in ['NOUN', 'PROPN', 'VERB'] and | |
| (not (token.is_stop or | |
| token.is_punct or | |
| token.is_space | |
| ) | |
| ) | |
| ] | |
| else: | |
| tokens = [token.lemma_.lower() for token in doc | |
| if not (token.is_stop or | |
| token.is_punct or | |
| token.is_space | |
| ) | |
| ] | |
| clean_txt = ' '.join(tokens) | |
| # Ask if return text or tokens | |
| if tokenize == True: | |
| result = tokens | |
| else: | |
| result = clean_txt | |
| # Option for list of entities in output | |
| if 'ent' in kwargs: | |
| result = {'output':result, 'ents': doc.ents} | |
| return result | |
| def my_pred(X): | |
| """ | |
| Takes an embedding X obtained from the USE | |
| model, scales it with our scaler first and | |
| returns the prediction of our tag suggestion model in | |
| form of a binary list. | |
| """ | |
| # Scaling with pre-trained scaler | |
| X_scaled = scaler.transform(X) | |
| # Predicting probabilities, using best thresh pre-trained | |
| y_pred_proba = clf.predict_proba(X_scaled) | |
| y_pred = (y_pred_proba > thresh).astype(int).reshape((len(tag_list),)) | |
| return y_pred | |
| def binary_to_tag_list(binary): | |
| """ | |
| Converts a binary list to the list of tags (str). | |
| """ | |
| fil = [bool(x) for x in list(binary)] | |
| list_tags = list(compress(tag_list,fil)) | |
| return list_tags | |
| def tag_suggestion(raw_text): | |
| """ | |
| Returns a list of tags suggested for the question raw_text. | |
| """ | |
| # Clean text first | |
| clean_text = clean(raw_text) | |
| document = [clean_text] | |
| # Find an embedding of the text with USE | |
| X = embed(document) | |
| # Predict a tag set with our classification model | |
| pred = my_pred(X) | |
| return binary_to_tag_list(pred) | |
| # -------------------------------------------------- | |
| examples = [ | |
| ["Jquery/Javascript Opacity animation with scroll <p>I'm looking to change the opacity on an object (and have the transition be animated) based on a users scroll.\nexample(http://davegamache.com/)</p>\n\n<p>I've searched everywhere\nlike here, but it ends up pointing me to the waypoints plugin (http://stackoverflow.com/questions/6316757/opacity-based-on-scroll-position)</p>\n\n<p>I've implemented the [waypoints][1] plugin and have the object fading once it's higher than 100px. [Using the offet attribute] but would like to basically control the opacity of an object and have the animation be visible like the above example.</p>\n\n<p>I've searched all over- this is my last resort.\nAny help is greatly appreciated.</p>\n"], | |
| ['Setting cross-domain cookies in Safari <p>I have to call domain A.com (which sets the cookies with http) from domain B.com.\nAll I do on domain B.com is (javascript): </p>\n\n<pre><code>var head = document.getElementsByTagName("head")[0];\nvar script = document.createElement("script");\nscript.src = "A.com/setCookie?cache=1231213123";\nhead.appendChild(script);\n</code></pre>\n\n<p>This sets the cookie on A.com on every browser I\'ve tested, except Safari.\nAmazingly this works in IE6, even without the P3P headers.</p>\n\n<p>Is there any way to make this work in Safari?</p>\n'], | |
| ['Database migrations for SQL Server <p>I need a database migration framework for SQL Server, capable of managing both schema changes and data migrations.</p>\n\n<p>I guess I am looking for something similar to django\'s <a href="http://south.aeracode.org/" rel="noreferrer">South</a> framework here.</p>\n\n<p>Given the fact that South is tightly coupled with django\'s ORM, and the fact that there\'s so many ORMs for SQL Server I guess having just a generic migration framework, enabling you to write and execute in controlled and sequential manner SQL data/schema change scripts should be sufficient.</p>\n'], | |
| ] | |
| demo = gr.Interface(fn=tag_suggestion, | |
| inputs="text", | |
| outputs=["text"], | |
| examples=examples) | |
| if __name__ == "__main__": | |
| demo.launch() | |