Spaces:

WizardCoder2007
/

social_media_analyzer

Running

App Files Files Community

social_media_analyzer / src /train_logic_aligned.py

WizardCoder2007

update

bbd259b 14 days ago

raw

history blame contribute delete

3.62 kB

	import numpy as np
	import joblib
	import os
	from sklearn.linear_model import LogisticRegression

	# Output path
	MODEL_PATH = os.path.join(os.path.dirname(__file__), "..", "models", "final_classifier.pkl")
	os.makedirs(os.path.dirname(MODEL_PATH), exist_ok=True)

	print(">>> Generating Synthetic Logic-Aligned Training Data...")

	# Features:
	# 0: sim_pro_india
	# 1: sim_anti_india
	# 2: sim_pro_govt
	# 3: sim_anti_govt
	# 4: sim_neutral
	# 5: neg
	# 6: neu
	# 7: pos
	# 8: sarcasm
	# 9: context_pol_crit (Anti-Govt)
	# 10: context_nat_crit (Anti-India)
	# 11: context_pol_praise (Pro-Govt)
	# 12: context_nat_praise (Pro-India)

	def generate_sample(label_idx):
	# Base noise for 13 features
	feats = np.random.uniform(0.0, 0.3, 13)

	# 0: Pro-India
	if label_idx == 0:
	feats[0] = np.random.uniform(0.6, 1.0) # High Pro-India Sim
	feats[7] = np.random.uniform(0.5, 1.0) # High Positive
	feats[5] = np.random.uniform(0.0, 0.2) # Low Negative
	feats[8] = np.random.uniform(0.0, 0.2) # IGNORE SARCASM
	# LLM Context
	feats[12] = np.random.uniform(0.7, 1.0) # High National Praise
	feats[9] = np.random.uniform(0.0, 0.2) # Low Pol Crit

	# 1: Anti-India
	elif label_idx == 1:
	feats[1] = np.random.uniform(0.6, 1.0) # High Anti-India Sim
	feats[5] = np.random.uniform(0.5, 1.0) # High Negative
	feats[7] = np.random.uniform(0.0, 0.2) # Low Positive
	feats[8] = np.random.uniform(0.0, 0.2) # IGNORE SARCASM
	# LLM Context
	feats[10] = np.random.uniform(0.7, 1.0) # High National Criticism
	feats[9] = np.random.uniform(0.0, 0.3) # Low/Med Pol Crit

	# 2: Pro-Government
	elif label_idx == 2:
	feats[2] = np.random.uniform(0.6, 1.0) # High Pro-Govt Sim
	feats[7] = np.random.uniform(0.5, 1.0) # High Positive
	feats[5] = np.random.uniform(0.0, 0.2) # Low Negative
	feats[8] = np.random.uniform(0.0, 0.2) # IGNORE SARCASM
	# LLM Context
	feats[11] = np.random.uniform(0.7, 1.0) # High Political Praise
	feats[10] = np.random.uniform(0.0, 0.2) # Low Nat Crit

	# 3: Anti-Government
	elif label_idx == 3:
	feats[3] = np.random.uniform(0.6, 1.0) # High Anti-Govt Sim
	feats[5] = np.random.uniform(0.5, 1.0) # High Negative
	feats[7] = np.random.uniform(0.0, 0.2) # Low Positive
	feats[8] = np.random.uniform(0.0, 0.2) # IGNORE SARCASM
	# LLM Context
	feats[9] = np.random.uniform(0.7, 1.0) # High Political Criticism!
	feats[10] = np.random.uniform(0.0, 0.4) # Low/Med Nat Crit

	# 4: Neutral
	elif label_idx == 4:
	feats[4] = np.random.uniform(0.5, 1.0) # High Neutral Sim
	feats[6] = np.random.uniform(0.5, 1.0) # High Neutral Sentiment
	feats[5] = np.random.uniform(0.0, 0.2)
	feats[7] = np.random.uniform(0.0, 0.2)
	feats[8] = np.random.uniform(0.0, 0.1)
	# LLM Context -> All low or balanced
	feats[9] = np.random.uniform(0.0, 0.3)
	feats[10] = np.random.uniform(0.0, 0.3)

	return feats

	# Generate data
	X = []
	y = []
	SAMPLES_PER_CLASS = 500

	for label in range(5):
	for _ in range(SAMPLES_PER_CLASS):
	X.append(generate_sample(label))
	y.append(label)

	X = np.array(X)
	y = np.array(y)

	print(f"Training Logistic Regression on {len(X)} synthetic samples (13 features)...")
	clf = LogisticRegression(max_iter=1000, multi_class='multinomial', solver='lbfgs')
	clf.fit(X, y)

	print(f"Accuracy on Training Set: {clf.score(X, y):.4f}")

	print(f"Saving model to {MODEL_PATH}...")
	joblib.dump(clf, MODEL_PATH)
	print("DONE.")