Spaces:
Build error
Build error
| import gradio as gr | |
| import joblib | |
| import pandas as pd | |
| import re | |
| import nltk | |
| from nltk.stem import PorterStemmer | |
| from nltk.tokenize import word_tokenize | |
| import numpy as np | |
| nltk.download('punkt') | |
| nltk.download('stopwords') | |
| def text_preprocessing(df): | |
| """ | |
| This function does in-place replacement of data so it won't return anything | |
| """ | |
| # Convert to lower cases | |
| df['Text'] = df['Text'].str.lower() | |
| # Remove punctuation | |
| df['Text'] = df['Text'].apply(lambda doc: re.sub(r'[^\w\s]+', '', doc)) | |
| # Remove stopwords | |
| stop_words = nltk.corpus.stopwords.words('english') | |
| df['Text'] = df['Text'].apply(lambda doc: ' '.join([word for word in doc.split() if word not in (stop_words)])) | |
| # Remove extra spaces | |
| df['Text'] = df['Text'].apply(lambda doc: re.sub(' +', ' ', doc)) | |
| # Stemming | |
| porter_stemmer = PorterStemmer() | |
| df['Text'] = df['Text'].apply(lambda doc: [porter_stemmer.stem(word) for word in word_tokenize(doc)]) | |
| df['Text'] = df['Text'].apply(lambda words: ' '.join(words)) | |
| def predict_user_input(paragraph, tfidf, nmf, label_mapping_yp): | |
| data = pd.DataFrame({'Text': [paragraph]}) | |
| text_preprocessing(data) | |
| tfidf_transformed = tfidf.transform(data['Text']) | |
| nmf_transformed = nmf.transform(tfidf_transformed) | |
| y_pred = np.argmax(nmf_transformed, axis=1) | |
| y_pred = [label_mapping_yp[y] for y in y_pred] | |
| return y_pred[0] | |
| def process_paragraph(paragraph): | |
| tfidf = joblib.load('tfidf_vectorizer.pkl') | |
| nmf = joblib.load('nmf_model.pkl') | |
| label_mapping_yp = joblib.load('label_mapping.pkl') | |
| predicted_class = predict_user_input(paragraph, tfidf, nmf, label_mapping_yp) | |
| print(f"The predicted class for the input paragraph is: {predicted_class}") | |
| return predicted_class | |
| def paragraph_processing_app(paragraph): | |
| processed_text = process_paragraph(paragraph) | |
| return processed_text | |
| input_text = gr.Textbox(lines=10, label="Enter a article:") | |
| output_text = gr.Textbox(label="Category(Out of Business, Tech, Sport, Politics and Entertainment.)") | |
| gr.Interface(fn=paragraph_processing_app, inputs=input_text, outputs=output_text).launch(share=True) | |