modelo_opiniones_peliculas / train_model.py
IABD07's picture
Upload train_model.py
ad327a4 verified
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
import joblib
import os
# 1. Cargar el dataset
df = pd.read_csv("dataset.csv")
# 2. Separar variables
X = df["text"]
y = df["label"]
# 3. Dividir en entrenamiento y prueba
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# 4. Crear pipeline de procesamiento y modelo
model = Pipeline([
('vectorizer', CountVectorizer()),
('classifier', LogisticRegression(max_iter=300))
])
# 5. Entrenar el modelo
model.fit(X_train, y_train)
# 6. Evaluar (opcional, pero útil)
accuracy = model.score(X_test, y_test)
print(f"Precisión en el conjunto de prueba: {accuracy:.2f}")
# 7. Guardar el modelo
os.makedirs("model", exist_ok=True)
joblib.dump(model, "model/modelo_opiniones_peliculas.pkl")
print("Modelo guardado en model/modelo_opiniones_peliculas.pkl")