| import pandas as pd
|
| from sklearn.model_selection import train_test_split
|
| from sklearn.feature_extraction.text import CountVectorizer
|
| from sklearn.linear_model import LogisticRegression
|
| from sklearn.pipeline import Pipeline
|
| import joblib
|
| import os
|
|
|
|
|
| df = pd.read_csv("dataset.csv")
|
|
|
|
|
| X = df["text"]
|
| y = df["label"]
|
|
|
|
|
| X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
|
|
|
|
|
| model = Pipeline([
|
| ('vectorizer', CountVectorizer()),
|
| ('classifier', LogisticRegression(max_iter=300))
|
| ])
|
|
|
|
|
| model.fit(X_train, y_train)
|
|
|
|
|
| accuracy = model.score(X_test, y_test)
|
| print(f"Precisión en el conjunto de prueba: {accuracy:.2f}")
|
|
|
|
|
| os.makedirs("model", exist_ok=True)
|
| joblib.dump(model, "model/modelo_opiniones_peliculas.pkl")
|
| print("Modelo guardado en model/modelo_opiniones_peliculas.pkl")
|
|
|