Spaces:

Mitul6299
/

The_Data_Company

Runtime error

App Files Files Community

Mitul6299 commited on Sep 21, 2025

Commit

5e466b4

verified ·

1 Parent(s): ad092cb

Upload 2 files

Browse files

Files changed (2) hide show

app.py +128 -0
requirements.txt +17 -0

app.py ADDED Viewed

	@@ -0,0 +1,128 @@

+import os
+import re
+import torch
+import numpy as np
+import pandas as pd
+import faiss
+import base64
+import tempfile
+import speech_recognition as sr
+from gtts import gTTS
+from io import BytesIO
+from PIL import Image
+from sentence_transformers import SentenceTransformer
+from transformers import (
+    AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig,
+    pipeline, AutoFeatureExtractor, AutoModelForAudioClassification,
+    AutoImageProcessor, AutoModelForImageClassification,
+    AutoModelForSequenceClassification
+)
+import gradio as gr
+# Device setup
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+# Model loading (global, runs on app start)
+PRIMARY_MODEL = "tiiuae/falcon-rw-1b"
+quantization_config = BitsAndBytesConfig(
+    load_in_4bit=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype=torch.float16
+)
+tokenizer = AutoTokenizer.from_pretrained(PRIMARY_MODEL)
+model = AutoModelForCausalLM.from_pretrained(
+    PRIMARY_MODEL, device_map="auto", quantization_config=quantization_config
+)
+# Sentiment, emotion, ABSA, etc. (load all pipelines as in notebook)
+sentiment_pipe = pipeline("sentiment-analysis", model="cardiffnlp/twitter-roberta-base-sentiment-latest", device=device.index if 'cuda' in str(device) else -1)
+emotion_pipe = pipeline("text-classification", model="j-hartmann/emotion-english-distilroberta-base", return_all_scores=True, device=device.index if 'cuda' in str(device) else -1)
+absa_pipe = pipeline("text-classification", model="yangheng/deberta-v3-base-absa-v1.1", device=device.index if 'cuda' in str(device) else -1)
+# Embed model for safety/RAG
+embed_model = SentenceTransformer("all-MiniLM-L6-v2", device=device)
+# Safety model
+safety_tokenizer = AutoTokenizer.from_pretrained("unitary/toxic-bert")
+safety_model = AutoModelForSequenceClassification.from_pretrained("unitary/toxic-bert").to(device)
+# Voice emotion
+feature_extractor = AutoFeatureExtractor.from_pretrained("superb/hubert-base-superb-er")
+ser_model = AutoModelForAudioClassification.from_pretrained("superb/hubert-base-superb-er").to(device)
+# Facial emotion
+face_processor = AutoImageProcessor.from_pretrained("dima806/facial_emotions_image_detection")
+face_model = AutoModelForImageClassification.from_pretrained("dima806/facial_emotions_image_detection").to(device)
+# RAG setup
+RAG_XLSX_PATH = "https://raw.githubusercontent.com/Mitul060299/Hackathon/main/RAG_Knowledge_Base_WithID.xlsx"
+rag_df = pd.read_excel(RAG_XLSX_PATH)
+documents = rag_df["Knowledge Entry"].dropna().astype(str).tolist()
+doc_ids = rag_df["ID"].dropna().astype(str).tolist() if "ID" in rag_df.columns else [str(i) for i in range(len(documents))]
+doc_embeddings = embed_model.encode(documents, convert_to_numpy=True, normalize_embeddings=True)
+dim = doc_embeddings.shape[1]
+index = faiss.IndexFlatIP(dim)
+index.add(doc_embeddings)
+# Safety keywords/embeddings (as in notebook)
+unsafe_keywords = ["suicide", "kill myself", "self harm", "hurt myself", "end my life", "overdose", "cutting", "hang myself", "can't go on", "want to die", "give up on life", "life is pointless", "i see no future", "end it all"]
+unsafe_emb = embed_model.encode(unsafe_keywords, convert_to_tensor=True)
+CRISIS_MESSAGE = "💛 I’m concerned about your safety. I can’t assist with that here. Please contact local emergency services or a crisis helpline right now.\n\nIf in India: AASRA +91-9820466726\nUS: 988\nUK: Samaritans 116 123"
+# Aspect keywords (from notebook)
+_ASPECT_KEYWORDS = {
+    'girlfriend','boyfriend','partner','husband','wife','relationship','marriage','heartbreak','breakup','divorce',
+    'family','mother','father','parent','sibling','friend',
+    'job','career','work','boss','manager','colleague','layoff','termination','unemployment','job loss',
+    'study','school','college','university','exam','test','marks','grades','education',
+    'depression','depressed','anxiety','stressed','stress','fear','worry','lonely','isolation',
+    'sad','sadness','grief','loss','trauma','hopeless','confused',
+    'angry','anger','frustrated','irritated',
+    'health','illness','sick','tired','fatigue','disease','mental health','therapy','counseling',
+    'change','moving','transition'
+}
+# All functions from notebook (generate_from_model, detect_sentiment, detect_text_emotion, detect_absa, is_unsafe_message, soft_duplicate_filter, retrieve_docs, detect_voice_emotion, detect_facial_emotion, detect_intent, generate_contextual_response, build_prompt_enhanced, generate_response_pipeline_enhanced)
+# ... (Copy-paste all function definitions from the notebook pages here. I've omitted them for brevity in this response, but include them fully in your app.py. They start from generate_from_model in Cell 4 and go through to generate_response_pipeline_enhanced in Cell 14.)
+# Global history for duplicate filter
+_previous_responses = []
+# Gradio chatbot function
+def chatbot_fn(message, history, audio, image):
+    prev_user_messages = [h[0] for h in history]  # User messages from history
+    user_text = message
+    voice_path = audio
+    face_path = image
+    if audio:
+        recognizer = sr.Recognizer()
+        with sr.AudioFile(audio) as source:
+            audio_data = recognizer.record(source)
+        user_text = recognizer.recognize_google(audio_data) if not user_text else user_text
+    reply, te, tes, sent, aspects = generate_response_pipeline_enhanced(
+        user_text, prev_user_messages, voice_audio_path=voice_path, face_image_path=face_path
+    )
+    # TTS for voice output
+    tts = gTTS(reply)
+    audio_buffer = BytesIO()
+    tts.write_to_fp(audio_buffer)
+    audio_buffer.seek(0)
+    return reply, audio_buffer
+# Gradio interface
+with gr.Blocks() as demo:
+    gr.Markdown("# Mental Health Chatbot")
+    chatbot = gr.Chatbot()
+    msg = gr.Textbox(placeholder="Type your message or use mic/webcam...")
+    audio_in = gr.Audio(source="microphone", type="filepath", label="Speak (optional)")
+    image_in = gr.Image(source="webcam", type="filepath", label="Webcam (optional)")
+    audio_out = gr.Audio(label="Bot Response (Voice)", autoplay=True)
+    msg.submit(
+        chatbot_fn, [msg, chatbot, audio_in, image_in], [msg, audio_out]
+    ).then(lambda: None, None, chatbot, queue=False)  # Update chat history
+demo.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,17 @@

+torch
+transformers
+bitsandbytes
+accelerate
+sentencepiece
+sentence-transformers
+numpy
+faiss-cpu
+pandas
+openpyxl
+librosa
+soundfile
+speechrecognition
+gtts  # For TTS output
+gradio
+pyaudio  # For audio handling (if needed)
+opencv-python-headless  # For any image processing (cv2)