Spaces:

anggars
/

sentimind

Sleeping

App Files Files Community

anggars commited on Jan 8

Commit

c7e5db4

1 Parent(s): 9dd7ae0

setup sentimind

Browse files

Files changed (14) hide show

.gitignore +1 -0
Dockerfile +24 -0
README.md +22 -7
api/core/__init__.py +0 -0
api/core/chatbot.py +44 -0
api/core/nlp_handler.py +175 -0
api/core/quiz_logic.py +54 -0
api/data/model_emotion.pkl +3 -0
api/data/model_mbti.pkl +3 -0
api/data/questions.json +58 -0
api/index.py +97 -0
api/predict.py +25 -0
api/quiz.py +26 -0
api/requirements.txt +11 -0

.gitignore ADDED Viewed

	@@ -0,0 +1 @@


1	+ *.pyc

Dockerfile ADDED Viewed

	@@ -0,0 +1,24 @@

+# Dockerfile untuk Hugging Face Spaces
+# Build dan jalankan backend FastAPI saja
+FROM python:3.10-slim
+# Create non-root user (required by HF Spaces)
+RUN useradd -m -u 1000 user
+USER user
+ENV PATH="/home/user/.local/bin:$PATH"
+WORKDIR /app
+# Copy requirements dan install dependencies
+COPY --chown=user api/requirements.txt requirements.txt
+RUN pip install --no-cache-dir --upgrade -r requirements.txt
+# Copy folder api ke dalam container
+COPY --chown=user api/ ./api/
+# Expose port 7860 (default HF Spaces)
+EXPOSE 7860
+# Jalankan uvicorn dengan path module yang benar
+CMD ["uvicorn", "api.index:app", "--host", "0.0.0.0", "--port", "7860"]

README.md CHANGED Viewed

@@ -1,12 +1,27 @@
 ---
-title: Sentimind Api
-emoji: 🏆
-colorFrom: gray
-colorTo: red
 sdk: docker
 pinned: false
-license: mit
-short_description: Backend API for Sentimind
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: Sentimind API
+emoji: 🧠
+colorFrom: orange
+colorTo: yellow
 sdk: docker
+app_port: 7860
 pinned: false
 ---
+# Sentimind API Backend
+Backend API untuk Sentimind - AI Personality Profiler.
+## Endpoints
+- `POST /api/predict` - Prediksi MBTI dari teks
+- `POST /api/chat` - Chat dengan AI assistant
+- `GET /api/quiz` - Get quiz questions
+- `POST /api/quiz` - Submit quiz answers
+- `GET /api/youtube/{video_id}` - Analyze YouTube video
+## Environment Variables
+Set these in HF Spaces Settings > Repository Secrets:
+- `GOOGLE_API_KEY` - Gemini API key
+- `YOUTUBE_API_KEY` - YouTube Data API key

api/core/__init__.py ADDED Viewed

File without changes

api/core/chatbot.py ADDED Viewed

	@@ -0,0 +1,44 @@

+# api/core/chatbot.py
+import os
+import google.generativeai as genai
+class MBTIChatbot:
+    def __init__(self):
+        print("🚀 Initializing MBTI Chatbot (Lite Version)...")
+        # 1. Setup Google Gemini
+        api_key = os.getenv("GEMINI_API_KEY")
+        if not api_key:
+            print("⚠️ WARNING: GEMINI_API_KEY not found in .env.")
+        else:
+            genai.configure(api_key=api_key)
+        try:
+            # Pake Gemini 2.0 Flash (Standard)
+            self.model = genai.GenerativeModel('gemini-2.0-flash')
+        except Exception:
+            print("⚠️ 2.0 Flash failed, fallback to Lite")
+            self.model = genai.GenerativeModel('gemini-2.0-flash-lite')
+    def generate_response(self, user_query, lang="en"):
+        # Versi Lite: Gak pake RAG (Database lokal), langsung pake knowledge LLM yang luas.
+        lang_instruction = "Answer in English." if lang == "en" else "Jawab dalam Bahasa Indonesia gaul (Slang Jakarta/Lo-Gue), maskulin, santai, dan to the point. Panggil user 'bro' atau 'bre'. JANGAN panggil 'bestie', 'kak', atau 'gan'. Gaya bicara tongkrongan cowok tapi tetap edukatif soal MBTI."
+        system_prompt = f"""
+You are Sentimind AI, an expert in MBTI personality types and mental health.
+{lang_instruction}
+USER QUERY:
+{user_query}
+INSTRUCTIONS:
+- Answer directly based on your extensive knowledge about MBTI and Psychology.
+- Be empathetic, insightful, and use formatting (bullet points) if helpful.
+- Keep answers concise (under 200 words) unless asked for details.
+"""
+        try:
+            response = self.model.generate_content(system_prompt)
+            return response.text
+        except Exception as e:
+            return f"Maaf, saya sedang mengalami gangguan koneksi ke otak AI saya. (Error: {str(e)})"

api/core/nlp_handler.py ADDED Viewed

	@@ -0,0 +1,175 @@

+import joblib
+import os
+import re
+import requests
+import numpy as np
+import html
+from deep_translator import GoogleTranslator
+from youtube_transcript_api import YouTubeTranscriptApi
+# --- CONFIG PATH ---
+BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+MBTI_PATH = os.path.join(BASE_DIR, 'data', 'model_mbti.pkl')
+EMOTION_PATH = os.path.join(BASE_DIR, 'data', 'model_emotion.pkl')
+_model_mbti = None
+_model_emotion = None
+EMOTION_TRANSLATIONS = {
+    'admiration': 'Kagum', 'amusement': 'Terhibur', 'anger': 'Marah',
+    'annoyance': 'Kesal', 'approval': 'Setuju', 'caring': 'Peduli',
+    'confusion': 'Bingung', 'curiosity': 'Penasaran', 'desire': 'Keinginan',
+    'disappointment': 'Kecewa', 'disapproval': 'Tidak Setuju', 'disgust': 'Jijik',
+    'embarrassment': 'Malu', 'excitement': 'Semangat', 'fear': 'Takut',
+    'gratitude': 'Bersyukur', 'grief': 'Berduka', 'joy': 'Gembira',
+    'love': 'Cinta', 'nervousness': 'Gugup', 'optimism': 'Optimis',
+    'pride': 'Bangga', 'realization': 'Sadar', 'relief': 'Lega',
+    'remorse': 'Menyesal', 'sadness': 'Sedih', 'surprise': 'Terkejut',
+    'neutral': 'Netral'
+}
+class NLPHandler:
+    @staticmethod
+    def load_models():
+        global _model_mbti, _model_emotion
+        if _model_mbti is None and os.path.exists(MBTI_PATH):
+            try: _model_mbti = joblib.load(MBTI_PATH)
+            except: pass
+        if _model_emotion is None and os.path.exists(EMOTION_PATH):
+            try: _model_emotion = joblib.load(EMOTION_PATH)
+            except: pass
+    @staticmethod
+    def translate_to_english(text):
+        try:
+            if len(text) > 4500: text = text[:4500]
+            return GoogleTranslator(source='auto', target='en').translate(text)
+        except: return text
+    @staticmethod
+    def extract_keywords(text):
+        stopwords = ["the", "and", "is", "to", "in", "it", "of", "for", "with", "on", "that", "this", "my", "was", "as", "are", "have", "you", "but", "so", "ini", "itu", "dan", "yang", "di", "ke"]
+        words = re.findall(r'\w+', text.lower())
+        filtered = [w for w in words if len(w) > 3 and w not in stopwords]
+        freq = {}
+        for w in filtered: freq[w] = freq.get(w, 0) + 1
+        sorted_words = sorted(freq.items(), key=lambda x: x[1], reverse=True)
+        keywords_en = [w[0] for w in sorted_words[:5]]
+        keywords_id = []
+        try:
+            translator = GoogleTranslator(source='auto', target='id')
+            for k in keywords_en: keywords_id.append(translator.translate(k))
+        except: keywords_id = keywords_en
+        return {"en": keywords_en, "id": keywords_id}
+    @staticmethod
+    def predict_all(raw_text):
+        NLPHandler.load_models()
+        processed_text = NLPHandler.translate_to_english(raw_text)
+        mbti_result = "UNKNOWN"
+        if _model_mbti:
+            try: mbti_result = _model_mbti.predict([processed_text])[0]
+            except: pass
+        emotion_data = {"id": "Kompleks", "en": "Complex", "raw": "unknown"}
+        if _model_emotion:
+            try:
+                pred_label = "neutral"
+                if hasattr(_model_emotion, "predict_proba"):
+                    probs = _model_emotion.predict_proba([processed_text])[0]
+                    classes = _model_emotion.classes_
+                    neutral_indices = [i for i, c in enumerate(classes) if c.lower() == 'neutral']
+                    if neutral_indices:
+                        idx = neutral_indices[0]
+                        if probs[idx] < 0.65: probs[idx] = 0.0
+                    if np.sum(probs) > 0:
+                        best_idx = np.argmax(probs)
+                        pred_label = classes[best_idx]
+                    else:
+                        pred_label = _model_emotion.predict([processed_text])[0]
+                else:
+                    pred_label = _model_emotion.predict([processed_text])[0]
+                indo_label = EMOTION_TRANSLATIONS.get(pred_label, pred_label.capitalize())
+                emotion_data = {"id": indo_label, "en": pred_label.capitalize(), "raw": pred_label}
+            except: pass
+        return {
+            "mbti": mbti_result,
+            "emotion": emotion_data,
+            "keywords": NLPHandler.extract_keywords(processed_text)
+        }
+    # --- JALUR RESMI: YOUTUBE DATA API ---
+    @staticmethod
+    def _fetch_official_api(video_id, api_key):
+        print(f"🔑 Using Official API Key for {video_id}...")
+        text_parts = []
+        try:
+            # 1. Ambil Metadata
+            url_meta = f"https://www.googleapis.com/youtube/v3/videos?part=snippet&id={video_id}&key={api_key}"
+            res_meta = requests.get(url_meta, timeout=5)
+            if res_meta.status_code == 200:
+                data = res_meta.json()
+                if "items" in data and len(data["items"]) > 0:
+                    snippet = data["items"][0]["snippet"]
+                    # Unescape biar &quot; jadi " dan &#39; jadi '
+                    title = html.unescape(snippet['title'])
+                    desc = html.unescape(snippet['description'])
+                    text_parts.append(f"Title: {title}")
+                    text_parts.append(f"Description: {desc}")
+            # 2. Ambil Komentar
+            url_comm = f"https://www.googleapis.com/youtube/v3/commentThreads?part=snippet&videoId={video_id}&maxResults=30&order=relevance&key={api_key}"
+            res_comm = requests.get(url_comm, timeout=5)
+            if res_comm.status_code == 200:
+                data = res_comm.json()
+                comments = []
+                for item in data.get("items", []):
+                    raw_comm = item["snippet"]["topLevelComment"]["snippet"]["textDisplay"]
+                    # Bersihkan tag HTML <b> <br>
+                    clean_comm = re.sub(r'<[^>]+>', '', raw_comm)
+                    # Bersihkan entities &quot; &#39;
+                    clean_comm = html.unescape(clean_comm)
+                    comments.append(clean_comm)
+                if comments:
+                    text_parts.append("\n\n--- Top Comments (Community Vibe) ---\n")
+                    text_parts.extend(comments)
+            if not text_parts:
+                return None
+            return "\n\n".join(text_parts)
+        except Exception as e:
+            print(f"❌ Official API Error: {e}")
+            return None
+    @staticmethod
+    def fetch_youtube_transcript(video_id):
+        # 1. PRIORITAS UTAMA: Cek API Key
+        api_key = os.getenv("YOUTUBE_API_KEY")
+        if api_key:
+            official_data = NLPHandler._fetch_official_api(video_id, api_key)
+            if official_data:
+                return official_data
+        # 2. PRIORITAS KEDUA: Fallback Scraping
+        print(f"🎬 Fetching transcript (fallback) for: {video_id}")
+        try:
+            transcript_list = YouTubeTranscriptApi.get_transcript(video_id, languages=['id', 'en', 'en-US'])
+            full_text = " ".join([item['text'] for item in transcript_list])
+            clean_text = re.sub(r'\[.*?\]|\(.*?\)', '', full_text).strip()
+            # Unescape juga buat hasil scraping
+            return html.unescape(clean_text)
+        except Exception:
+            pass
+        return None

api/core/quiz_logic.py ADDED Viewed

	@@ -0,0 +1,54 @@

+import json
+import os
+# --- CONFIG PATH ---
+# Mengambil path folder "api"
+BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+# Mengarah ke api/data/questions.json
+DB_PATH = os.path.join(BASE_DIR, 'data', 'questions.json')
+class QuizHandler:
+    @staticmethod
+    def get_questions():
+        """Mengambil semua soal dari database JSON"""
+        try:
+            if not os.path.exists(DB_PATH):
+                return []
+            with open(DB_PATH, 'r') as f:
+                return json.load(f)
+        except Exception as e:
+            print(f"Error reading quiz db: {e}")
+            return []
+    @staticmethod
+    def calculate_mbti(answers):
+        """
+        Hitung MBTI berdasarkan jawaban user.
+        Format answers: { "1": 2, "2": -1, ... } (Key=ID Soal, Value=Skala -3 s/d 3)
+        """
+        questions = QuizHandler.get_questions()
+        if not questions:
+            return "UNKNOWN"
+        # Skor Awal (Balance 0)
+        scores = {'EI': 0, 'SN': 0, 'TF': 0, 'JP': 0}
+        for q in questions:
+            q_id = str(q['id'])
+            if q_id in answers:
+                # Rumus: Nilai User (-3 s/d 3) * Arah Soal (1 atau -1)
+                # Contoh: Soal Introvert (Dir -1), User Jawab Sangat Setuju (3)
+                # Hitungan: 3 * -1 = -3 (Skor bergerak ke arah I)
+                val = int(answers[q_id])
+                scores[q['dimension']] += (val * q['direction'])
+        # Tentukan Hasil Akhir
+        # Positif = E, S, T, J
+        # Negatif = I, N, F, P
+        result = ""
+        result += "E" if scores['EI'] >= 0 else "I"
+        result += "S" if scores['SN'] >= 0 else "N"
+        result += "T" if scores['TF'] >= 0 else "F"
+        result += "J" if scores['JP'] >= 0 else "P"
+        return result

api/data/model_emotion.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8aedddc9609c31c78f5b2d169962e1bc97bfe228933986373a51df620e37f4a7
+size 3145820

api/data/model_mbti.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:058d5de1e06f1c305e133eceb4a62a6c2b18a304fc16dd6866ef315eefe10b9a
+size 2497720

api/data/questions.json ADDED Viewed

	@@ -0,0 +1,58 @@

+[
+  {
+    "id": 1,
+    "text_id": "Saya merasa lebih berenergi setelah bergaul dengan banyak orang.",
+    "text_en": "I feel more energized after socializing with a large group of people.",
+    "dimension": "EI",
+    "direction": 1
+  },
+  {
+    "id": 2,
+    "text_id": "Saya lebih suka fokus pada fakta nyata daripada ide abstrak.",
+    "text_en": "I prefer to focus on real facts rather than abstract ideas.",
+    "dimension": "SN",
+    "direction": 1
+  },
+  {
+    "id": 3,
+    "text_id": "Saya mengambil keputusan berdasarkan logika, bukan perasaan.",
+    "text_en": "I make decisions based on logic rather than feelings.",
+    "dimension": "TF",
+    "direction": 1
+  },
+  {
+    "id": 4,
+    "text_id": "Saya suka membuat rencana detail sebelum melakukan sesuatu.",
+    "text_en": "I like to have a detailed plan before doing anything.",
+    "dimension": "JP",
+    "direction": 1
+  },
+  {
+    "id": 5,
+    "text_id": "Saya sering merasa lelah jika harus bersosialisasi terlalu lama.",
+    "text_en": "I often feel drained if I have to socialize for too long.",
+    "dimension": "EI",
+    "direction": -1
+  },
+  {
+    "id": 6,
+    "text_id": "Saya sering membayangkan masa depan dan kemungkinan-kemungkinannya.",
+    "text_en": "I often imagine the future and its possibilities.",
+    "dimension": "SN",
+    "direction": -1
+  },
+  {
+    "id": 7,
+    "text_id": "Saya mudah tersentuh secara emosional oleh cerita orang lain.",
+    "text_en": "I am easily emotionally moved by other people's stories.",
+    "dimension": "TF",
+    "direction": -1
+  },
+  {
+    "id": 8,
+    "text_id": "Saya lebih suka bertindak spontan daripada mengikuti jadwal kaku.",
+    "text_en": "I prefer to be spontaneous rather than following a rigid schedule.",
+    "dimension": "JP",
+    "direction": -1
+  }
+]

api/index.py ADDED Viewed

	@@ -0,0 +1,97 @@

+from fastapi import FastAPI
+from dotenv import load_dotenv
+from .core.nlp_handler import NLPHandler
+import os
+# Load environment variables dari file .env
+load_dotenv()
+from api.predict import predict_endpoint
+from api.quiz import get_quiz_questions, submit_quiz
+from api.core.chatbot import MBTIChatbot
+from pydantic import BaseModel
+# Init Chatbot (Load dataset sekali di awal)
+chatbot = MBTIChatbot()
+class ChatRequest(BaseModel):
+    message: str
+    lang: str = "id" # Default ke Indo kalo gak dikirim
+from fastapi.middleware.cors import CORSMiddleware
+app = FastAPI()
+# Tambahkan CORS biar frontend (port 3000) bisa akses backend (port 8000)
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"], # Di produksi, ganti "*" dengan domain frontend lu
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+# --- TAMBAHAN DEBUGGING (CEK SAAT SERVER NYALA) ---
+@app.on_event("startup")
+async def startup_event():
+    api_key = os.getenv("YOUTUBE_API_KEY")
+    print("\n" + "="*40)
+    if api_key:
+        print(f"✅ API KEY DITEMUKAN: {api_key[:5]}...******")
+        print("🚀 Mode: OFFICIAL API (Anti-Blokir)")
+    else:
+        print("❌ API KEY TIDAK DITEMUKAN!")
+        print("⚠️  Mode: FALLBACK SCRAPING (Rawan Error)")
+    print("="*40 + "\n")
+app.add_api_route("/api/predict", predict_endpoint, methods=["POST"])
+app.add_api_route("/api/quiz", get_quiz_questions, methods=["GET"])
+app.add_api_route("/api/quiz", submit_quiz, methods=["POST"])
+@app.post("/api/chat")
+async def chat_endpoint(request: ChatRequest):
+    return {"response": chatbot.generate_response(request.message, request.lang)}
+@app.get("/api/hello")
+def health_check():
+    # Biar bisa dicek lewat browser: http://localhost:8000/api/hello
+    has_key = bool(os.getenv("YOUTUBE_API_KEY"))
+    return {
+        "status": "online",
+        "mode": "youtube_ready",
+        "api_key_detected": has_key
+    }
+# --- ROUTE YOUTUBE BARU ---
+@app.get("/api/youtube/{video_id}")
+def analyze_youtube_video(video_id: str):
+    # Panggil fungsi fetch YouTube
+    text = NLPHandler.fetch_youtube_transcript(video_id)
+    if not text:
+        return {
+            "success": False,
+            "error": "NO_TRANSCRIPT" # Kode error kalau video gak ada subtitle
+        }
+    # Analisis teks transkripnya
+    result = NLPHandler.predict_all(text)
+    response_data = {
+        "success": True,
+        "mbti_type": result["mbti"],
+        "emotion": result["emotion"],
+        "keywords": result["keywords"],
+    }
+    # Handle kalo inputnya dari YouTube (dict ada 'meta')
+    if isinstance(text, dict) and "meta" in text:
+        response_data["fetched_text"] = text["text_for_analysis"]
+        response_data["meta"] = text["meta"]
+    else:
+        response_data["fetched_text"] = text
+    return response_data

api/predict.py ADDED Viewed

	@@ -0,0 +1,25 @@

+from fastapi import FastAPI, HTTPException
+from pydantic import BaseModel
+# Import Logic dari Core
+from .core.nlp_handler import NLPHandler
+app = FastAPI()
+class UserInput(BaseModel):
+    text: str
+@app.post("/api/predict")
+def predict_endpoint(input_data: UserInput):
+    if not input_data.text:
+        raise HTTPException(status_code=400, detail="No text provided")
+    # Panggil Logic NLP (Auto-Translate -> Predict)
+    result = NLPHandler.predict_all(input_data.text)
+    # Return format JSON
+    return {
+        "success": True,
+        "mbti_type": result["mbti"],
+        "emotion": result["emotion"],
+        "keywords": result["keywords"]
+    }

api/quiz.py ADDED Viewed

	@@ -0,0 +1,26 @@

+from fastapi import FastAPI, HTTPException
+from pydantic import BaseModel
+from typing import Dict
+# Import Logic dari Core
+from .core.quiz_logic import QuizHandler
+app = FastAPI()
+# Model untuk menerima jawaban dari frontend
+class QuizSubmission(BaseModel):
+    answers: Dict[str, int] # Contoh: {"1": 3, "2": -2}
+@app.get("/api/quiz")
+def get_quiz_questions():
+    """Endpoint untuk Frontend mengambil soal"""
+    questions = QuizHandler.get_questions()
+    if not questions:
+        # Jika file json tidak terbaca/kosong
+        raise HTTPException(status_code=500, detail="Database soal tidak ditemukan")
+    return {"questions": questions}
+@app.post("/api/quiz")
+def submit_quiz(submission: QuizSubmission):
+    """Endpoint untuk Frontend kirim jawaban dan dapat hasil MBTI"""
+    result = QuizHandler.calculate_mbti(submission.answers)
+    return {"mbti": result}

api/requirements.txt ADDED Viewed

	@@ -0,0 +1,11 @@

+fastapi
+uvicorn
+python-dotenv
+pydantic
+numpy
+scikit-learn
+joblib
+deep-translator
+requests
+youtube-transcript-api
+google-generativeai