Spaces:

Bur3hani
/

p4T

Sleeping

App Files Files Community

p4T / app.py

Bur3hani

Create app.py

651ceb4 verified 7 months ago

raw

history blame contribute delete

3.48 kB

	import gradio as gr
	import joblib
	import re
	import nltk
	from nltk.corpus import stopwords
	from nltk.stem import WordNetLemmatizer
	from huggingface_hub import hf_hub_download

	# --- 1. Load models and vectorizer from YOUR Hub Repository ---
	# This is the critical part that connects your Space to your model repo.
	repo_id = "Bur3hani/Personality4rmText"

	print("Downloading assets from the Hub...")
	vectorizer = joblib.load(hf_hub_download(repo_id=repo_id, filename="mbti_vectorizer.joblib"))
	model_ie = joblib.load(hf_hub_download(repo_id=repo_id, filename="mbti_model_ie.joblib"))
	model_ns = joblib.load(hf_hub_download(repo_id=repo_id, filename="mbti_model_ns.joblib"))
	model_ft = joblib.load(hf_hub_download(repo_id=repo_id, filename="mbti_model_ft.joblib"))
	model_jp = joblib.load(hf_hub_download(repo_id=repo_id, filename="mbti_model_jp.joblib"))
	print("Assets downloaded successfully.")

	# Download NLTK data needed for text cleaning
	nltk.download('stopwords')
	nltk.download('wordnet')
	lemmatizer = WordNetLemmatizer()
	stop_words = set(stopwords.words('english'))

	# --- 2. Define the Text Cleaning and Prediction Functions ---
	def clean_text(text):
	# This function must be identical to the one used during training
	text = re.sub(r'http\S+\|www\S+\|https\S+', '', text, flags=re.MULTILINE)
	text = re.sub(r'\\|\\|\\|', ' ', text)
	text = re.sub(r'[^a-zA-Z\s]', '', text)
	text = text.lower()
	words = [lemmatizer.lemmatize(word) for word in text.split() if word not in stop_words]
	return " ".join(words)

	def predict_mbti(text):
	print(f"Received text: {text}")
	# 1. Clean and vectorize the input text
	cleaned_text = clean_text(text)
	vectorized_text = vectorizer.transform([cleaned_text])

	# 2. Predict each dimension
	pred_ie = model_ie.predict(vectorized_text)[0]
	pred_ns = model_ns.predict(vectorized_text)[0]
	pred_ft = model_ft.predict(vectorized_text)[0]
	pred_jp = model_jp.predict(vectorized_text)[0]

	# 3. Assemble the final type string
	mbti_type = ""
	mbti_type += "E" if pred_ie == 0 else "I"
	mbti_type += "S" if pred_ns == 0 else "N"
	mbti_type += "T" if pred_ft == 0 else "F"
	mbti_type += "J" if pred_jp == 0 else "P"

	print(f"Predicted Type: {mbti_type}")
	return mbti_type

	# --- 3. Create and Launch the Gradio Interface ---
	title = "MBTI Personality Predictor from Text"
	description = """
	Enter a block of text (e.g., from a blog post, email, or social media) and this app will predict the author's MBTI personality type.
	<br>This app uses a TF-IDF Vectorizer and four Logistic Regression models hosted on Hugging Face.
	<br><b>Disclaimer:</b> This is an educational AI demonstration and is not a clinical diagnostic tool.
	"""
	example1 = "I think planning my week out in advance is the best way to feel secure and get things done. I love debating ideas with friends and thinking about future possibilities rather than just focusing on the present."
	example2 = "This is all just a pragmatic process. We analyze the data, find the most logical solution, and implement it efficiently. Feelings don't factor into the equation. Let's just get it done."

	iface = gr.Interface(
	fn=predict_mbti,
	inputs=gr.Textbox(lines=8, label="Your Text", placeholder="Enter your text here..."),
	outputs=gr.Textbox(label="Predicted MBTI Type"),
	title=title,
	description=description,
	examples=[example1, example2],
	theme=gr.themes.Soft()
	)

	iface.launch()