File size: 3,475 Bytes
651ceb4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
import gradio as gr
import joblib
import re
import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from huggingface_hub import hf_hub_download

# --- 1. Load models and vectorizer from YOUR Hub Repository ---
# This is the critical part that connects your Space to your model repo.
repo_id = "Bur3hani/Personality4rmText" 

print("Downloading assets from the Hub...")
vectorizer = joblib.load(hf_hub_download(repo_id=repo_id, filename="mbti_vectorizer.joblib"))
model_ie = joblib.load(hf_hub_download(repo_id=repo_id, filename="mbti_model_ie.joblib"))
model_ns = joblib.load(hf_hub_download(repo_id=repo_id, filename="mbti_model_ns.joblib"))
model_ft = joblib.load(hf_hub_download(repo_id=repo_id, filename="mbti_model_ft.joblib"))
model_jp = joblib.load(hf_hub_download(repo_id=repo_id, filename="mbti_model_jp.joblib"))
print("Assets downloaded successfully.")

# Download NLTK data needed for text cleaning
nltk.download('stopwords')
nltk.download('wordnet')
lemmatizer = WordNetLemmatizer()
stop_words = set(stopwords.words('english'))

# --- 2. Define the Text Cleaning and Prediction Functions ---
def clean_text(text):
    # This function must be identical to the one used during training
    text = re.sub(r'http\S+|www\S+|https\S+', '', text, flags=re.MULTILINE)
    text = re.sub(r'\|\|\|', ' ', text)
    text = re.sub(r'[^a-zA-Z\s]', '', text)
    text = text.lower()
    words = [lemmatizer.lemmatize(word) for word in text.split() if word not in stop_words]
    return " ".join(words)

def predict_mbti(text):
    print(f"Received text: {text}")
    # 1. Clean and vectorize the input text
    cleaned_text = clean_text(text)
    vectorized_text = vectorizer.transform([cleaned_text])
    
    # 2. Predict each dimension
    pred_ie = model_ie.predict(vectorized_text)[0]
    pred_ns = model_ns.predict(vectorized_text)[0]
    pred_ft = model_ft.predict(vectorized_text)[0]
    pred_jp = model_jp.predict(vectorized_text)[0]
    
    # 3. Assemble the final type string
    mbti_type = ""
    mbti_type += "E" if pred_ie == 0 else "I"
    mbti_type += "S" if pred_ns == 0 else "N"
    mbti_type += "T" if pred_ft == 0 else "F"
    mbti_type += "J" if pred_jp == 0 else "P"
    
    print(f"Predicted Type: {mbti_type}")
    return mbti_type

# --- 3. Create and Launch the Gradio Interface ---
title = "MBTI Personality Predictor from Text"
description = """
Enter a block of text (e.g., from a blog post, email, or social media) and this app will predict the author's MBTI personality type.
<br>This app uses a TF-IDF Vectorizer and four Logistic Regression models hosted on Hugging Face.
<br><b>Disclaimer:</b> This is an educational AI demonstration and is not a clinical diagnostic tool.
"""
example1 = "I think planning my week out in advance is the best way to feel secure and get things done. I love debating ideas with friends and thinking about future possibilities rather than just focusing on the present."
example2 = "This is all just a pragmatic process. We analyze the data, find the most logical solution, and implement it efficiently. Feelings don't factor into the equation. Let's just get it done."

iface = gr.Interface(
    fn=predict_mbti,
    inputs=gr.Textbox(lines=8, label="Your Text", placeholder="Enter your text here..."),
    outputs=gr.Textbox(label="Predicted MBTI Type"),
    title=title,
    description=description,
    examples=[example1, example2],
    theme=gr.themes.Soft()
)

iface.launch()