Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import pandas as pd | |
| from sklearn.model_selection import train_test_split | |
| from sklearn.linear_model import LogisticRegression | |
| from sklearn.feature_extraction.text import TfidfVectorizer | |
| from sklearn.metrics import accuracy_score, classification_report | |
| from sklearn.naive_bayes import MultinomialNB | |
| from sklearn.ensemble import RandomForestClassifier | |
| from sklearn.svm import SVC | |
| import pickle | |
| import matplotlib.pyplot as plt | |
| st.title(":blue[IMDB Dataset of 50k reviews]") | |
| def load_data(): | |
| return pd.read_csv('IMDB Dataset.csv') | |
| if 'models' not in st.session_state: | |
| st.session_state.models = {} | |
| if 'vectorizer' not in st.session_state: | |
| st.session_state.vectorizer = None | |
| if 'accuracy' not in st.session_state: | |
| st.session_state.accuracy = {} | |
| if 'report' not in st.session_state: | |
| st.session_state.report = {} | |
| # Dataset | |
| st.header("Dataset") | |
| df = load_data() | |
| with st.expander("Show Data"): | |
| st.write(df) | |
| df['sentiment'] = df['sentiment'].map({'positive':1,'negative':0}) | |
| X = df['review'] | |
| y = df['sentiment'] | |
| X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=41) | |
| tfidf_vectorizer = TfidfVectorizer() | |
| X_train_tfidf = tfidf_vectorizer.fit_transform(X_train) | |
| X_test_tfidf = tfidf_vectorizer.transform(X_test) | |
| if not st.session_state.models: | |
| st.session_state.vectorizer = TfidfVectorizer() | |
| X_train_tfidf = st.session_state.vectorizer.fit_transform(X_train) | |
| # models | |
| models = { | |
| # "SVM": SVC(kernel='linear'), | |
| "Logistic Regression": LogisticRegression(max_iter=1000), | |
| "Naive Bayes": MultinomialNB() | |
| } | |
| for name, model in models.items(): | |
| model.fit(X_train_tfidf, y_train) | |
| st.session_state.models[name] = model | |
| X_test_tfidf = st.session_state.vectorizer.transform(X_test) | |
| y_pred = model.predict(X_test_tfidf) | |
| st.session_state.accuracy[name] = accuracy_score(y_test, y_pred) | |
| st.session_state.report[name] = classification_report(y_test, y_pred) | |
| if st.session_state.accuracy: | |
| plt.figure(figsize=(10, 5)) | |
| plt.bar(st.session_state.accuracy.keys(), st.session_state.accuracy.values(), color=['blue', 'orange', 'green']) | |
| plt.ylabel('Accuracy') | |
| plt.title('Model Accuracy Comparison') | |
| st.pyplot(plt) | |
| for name in st.session_state.report: | |
| st.write(f"### Classification Report for {name}:") | |
| # st.text(st.session_state.report[name]) | |
| st.dataframe(st.session_state.report[name]) | |
| st.header("Manual Tryouts",divider='orange') | |
| # Input text from the user | |
| user_input = st.text_area("Enter your Review", "") | |
| if st.button("Predict"): | |
| if user_input: | |
| # Vectorize user input for all models | |
| user_input_tfidf = st.session_state.vectorizer.transform([user_input]) | |
| # Predict using all models | |
| predictions = {} | |
| for name, model in st.session_state.models.items(): | |
| prediction = model.predict(user_input_tfidf) | |
| predictions[name] = "Positive" if prediction[0] == 1 else "Negative" | |
| # Display predictions for each model | |
| st.write("Predicted Sentiment:") | |
| for name in predictions: | |
| st.write(f"{name}: **{predictions[name]}**") | |
| else: | |
| st.write("Please enter a review.") | |
| # # Linear Regression | |
| # st.header('Linear Regression',divider='orange') | |
| # model = LogisticRegression() | |
| # model.fit(X_train_tfidf, y_train) | |
| # y_pred = model.predict(X_test_tfidf) | |
| # print("Accuracy:", accuracy_score(y_test, y_pred)) | |
| # print(classification_report(y_test, y_pred)) | |
| # filename = 'linear_regression_model.pkl' | |
| # with open(filename, 'wb') as model_file: | |
| # pickle.dump(model, model_file) | |
| # st.write("Accuracy:", accuracy_score(y_test, y_pred)) | |
| # st.markdown(body=classification_report(y_test, y_pred),unsafe_allow_html=True) | |
| # # Naive Bayes | |
| # st.header("Naive Bayes",divider='orange') | |
| # model_nb = MultinomialNB() | |
| # model_nb.fit(X_train_tfidf, y_train) | |
| # # Evaluate the model | |
| # y_pred = model_nb.predict(X_test_tfidf) | |
| # st.write("Accuracy:", accuracy_score(y_test, y_pred)) | |
| # st.markdown(body=classification_report(y_test, y_pred),unsafe_allow_html=True) | |
| # # SVM | |
| # st.header("Support Vector Machine") | |
| # st.caption("Kernal type is linear.") | |
| # model = SVC(kernel='linear') # You can also try 'rbf', 'poly', etc. | |
| # model.fit(X_train_tfidf, y_train) | |
| # y_pred = model.predict(X_test_tfidf) | |
| # st.write("Accuracy:", accuracy_score(y_test, y_pred)) | |
| # st.markdown(body=classification_report(y_test, y_pred),unsafe_allow_html=True) | |