p4T / app.py
Bur3hani's picture
Create app.py
651ceb4 verified
import gradio as gr
import joblib
import re
import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from huggingface_hub import hf_hub_download
# --- 1. Load models and vectorizer from YOUR Hub Repository ---
# This is the critical part that connects your Space to your model repo.
repo_id = "Bur3hani/Personality4rmText"
print("Downloading assets from the Hub...")
vectorizer = joblib.load(hf_hub_download(repo_id=repo_id, filename="mbti_vectorizer.joblib"))
model_ie = joblib.load(hf_hub_download(repo_id=repo_id, filename="mbti_model_ie.joblib"))
model_ns = joblib.load(hf_hub_download(repo_id=repo_id, filename="mbti_model_ns.joblib"))
model_ft = joblib.load(hf_hub_download(repo_id=repo_id, filename="mbti_model_ft.joblib"))
model_jp = joblib.load(hf_hub_download(repo_id=repo_id, filename="mbti_model_jp.joblib"))
print("Assets downloaded successfully.")
# Download NLTK data needed for text cleaning
nltk.download('stopwords')
nltk.download('wordnet')
lemmatizer = WordNetLemmatizer()
stop_words = set(stopwords.words('english'))
# --- 2. Define the Text Cleaning and Prediction Functions ---
def clean_text(text):
# This function must be identical to the one used during training
text = re.sub(r'http\S+|www\S+|https\S+', '', text, flags=re.MULTILINE)
text = re.sub(r'\|\|\|', ' ', text)
text = re.sub(r'[^a-zA-Z\s]', '', text)
text = text.lower()
words = [lemmatizer.lemmatize(word) for word in text.split() if word not in stop_words]
return " ".join(words)
def predict_mbti(text):
print(f"Received text: {text}")
# 1. Clean and vectorize the input text
cleaned_text = clean_text(text)
vectorized_text = vectorizer.transform([cleaned_text])
# 2. Predict each dimension
pred_ie = model_ie.predict(vectorized_text)[0]
pred_ns = model_ns.predict(vectorized_text)[0]
pred_ft = model_ft.predict(vectorized_text)[0]
pred_jp = model_jp.predict(vectorized_text)[0]
# 3. Assemble the final type string
mbti_type = ""
mbti_type += "E" if pred_ie == 0 else "I"
mbti_type += "S" if pred_ns == 0 else "N"
mbti_type += "T" if pred_ft == 0 else "F"
mbti_type += "J" if pred_jp == 0 else "P"
print(f"Predicted Type: {mbti_type}")
return mbti_type
# --- 3. Create and Launch the Gradio Interface ---
title = "MBTI Personality Predictor from Text"
description = """
Enter a block of text (e.g., from a blog post, email, or social media) and this app will predict the author's MBTI personality type.
<br>This app uses a TF-IDF Vectorizer and four Logistic Regression models hosted on Hugging Face.
<br><b>Disclaimer:</b> This is an educational AI demonstration and is not a clinical diagnostic tool.
"""
example1 = "I think planning my week out in advance is the best way to feel secure and get things done. I love debating ideas with friends and thinking about future possibilities rather than just focusing on the present."
example2 = "This is all just a pragmatic process. We analyze the data, find the most logical solution, and implement it efficiently. Feelings don't factor into the equation. Let's just get it done."
iface = gr.Interface(
fn=predict_mbti,
inputs=gr.Textbox(lines=8, label="Your Text", placeholder="Enter your text here..."),
outputs=gr.Textbox(label="Predicted MBTI Type"),
title=title,
description=description,
examples=[example1, example2],
theme=gr.themes.Soft()
)
iface.launch()