Spaces:

amitbhatt6075
/

reachify-ai-service

Running

reachify-ai-service / training /train_matching_model.py

Complete fresh start - FINAL UPLOAD

0914e96 20 days ago

1.35 kB

	import pandas as pd
	from sklearn.ensemble import RandomForestRegressor
	from sklearn.compose import ColumnTransformer
	from sklearn.pipeline import Pipeline
	from sklearn.preprocessing import OneHotEncoder
	import joblib
	import os

	print("Training script started...")

	# 1. Data Load Karna
	df = pd.read_csv('data/sample_data.csv')

	# 2. Features (X) aur Target (y) ko Alag Karna
	X = df.drop('match_score', axis=1)
	y = df['match_score']

	# 3. Data Preprocessing Pipeline Banana
	categorical_features = ['niche', 'country']
	numeric_features = ['followers', 'engagement_rate']

	preprocessor = ColumnTransformer(
	transformers=[
	('num', 'passthrough', numeric_features),
	('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features)
	])

	# 4. Model Banana
	model = RandomForestRegressor(n_estimators=100, random_state=42)

	# 5. Full Pipeline Banana (Preprocessing + Model)
	pipeline = Pipeline(steps=[('preprocessor', preprocessor),
	('regressor', model)])

	# 6. Model ko Train Karna
	pipeline.fit(X, y)
	print("Model training complete.")

	# 7. Trained Model ko Save Karna
	# Ensure the models directory exists
	if not os.path.exists('models'):
	os.makedirs('models')

	model_path = 'models/influencer_matcher_v1.joblib'
	joblib.dump(pipeline, model_path)
	print(f"Model successfully saved to {model_path}")