Spaces:

doublesizebed
/

chatbot

Paused

App Files Files Community

doublesizebed commited on May 6, 2025

Commit

60ebaee

1 Parent(s): a8bcefb

Initial Docker Space

Browse files

Files changed (2) hide show

app.py +114 -52
requirements.txt +1 -1

app.py CHANGED Viewed

@@ -24,62 +24,124 @@ CORS(app)
 AUDIO_FOLDER = os.path.join(dir_path, 'static', 'audio')
 os.makedirs(AUDIO_FOLDER, exist_ok=True)
-# Load language detection model
-lid_model = fasttext.load_model(
-    hf_hub_download("doublesizebed/predict_malay_en", "lid_ms_en.bin")
-)
-def tokenize(text):
-    tokens = text.lower().split()
-    return [t.strip(string.punctuation) for t in tokens if t.strip(string.punctuation)]
-def detect_lang(token):
-    label, _ = lid_model.predict(token)
-    return label[0].replace("__label__", "").upper()
-# G2P models
-g2p_ms_tokenizer = AutoTokenizer.from_pretrained("doublesizebed/G2P_malay")
-g2p_ms_model     = AutoModelForSeq2SeqLM.from_pretrained("doublesizebed/G2P_malay").to('cuda' if torch.cuda.is_available() else 'cpu')
-g2p_eng = make_g2p("eng", "eng-ipa")
-def predict_phonemes(word, lang):
-    if lang == "MS":
-        inputs = g2p_ms_tokenizer(word, return_tensors="pt", padding=True, truncation=True)
-        inputs = inputs.to(g2p_ms_model.device)
-        outputs = g2p_ms_model.generate(**inputs)
-        return g2p_ms_tokenizer.decode(outputs[0], skip_special_tokens=True)
-    else:
-        tg = g2p_eng(word)
-        return ' '.join(tg.to_sequence())
-# Chatbot setup
 class ChatBot:
     def __init__(self):
         self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
-        # Load conversation model\        self.tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
-        self.model = AutoModelForCausalLM.from_pretrained(
-            "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
-        ).to(self.device)
-        self.chat_history = None
-        # Parler TTS
-        self.tts_model = ParlerTTSForConditionalGeneration.from_pretrained(
-            "doublesizebed/parler-tts-mini-malay"
-        ).to(self.device)
-        self.tts_text_tokenizer = AutoTokenizer.from_pretrained(
-            self.tts_model.config.text_encoder._name_or_path
-        )
-        self.tts_desc_tokenizer = AutoTokenizer.from_pretrained(
-            self.tts_model.config.text_encoder._name_or_path
         )
-        # NLTK\        nltk.download('brown')
-        nltk.download('punkt')
-        nltk.download('averaged_perceptron_tagger')
-    async def chat(self, user_input, gender):
-        # Build prompt ... (same as original)
-        # Generate response\        # Translate & mask nouns\        # TTS generation...
-        # Save WAV in static/audio and return filename
-        return "Translated text", "response.wav"
 chatbot = ChatBot()
@@ -87,7 +149,7 @@ chatbot = ChatBot()
 def chat_endpoint():
     data = request.get_json()
     user_text = data.get('message', '')
-    gender = data.get('gender', 'male')
     if not user_text:
         return jsonify({"error": "Empty message"}), 400
     resp_text, wav_name = asyncio.run(chatbot.chat(user_text, gender))

 AUDIO_FOLDER = os.path.join(dir_path, 'static', 'audio')
 os.makedirs(AUDIO_FOLDER, exist_ok=True)
 class ChatBot:
     def __init__(self):
+        self.chat_history_ids = None
+        self.bot_input_ids = None
         self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
+        self.tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
+        self.model = AutoModelForCausalLM.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0").to(self.device)
+        try:
+            nltk.data.find('corpora/brown')
+        except LookupError:
+            nltk.download('brown')
+        try:
+            nltk.data.find('tokenizers/punkt')
+            nltk.data.find('tokenizers/punkt_tab')
+        except LookupError:
+            nltk.download('punkt')
+            nltk.download('punkt_tab')
+        # Parler-TTS Setup
+        self.tts_model = ParlerTTSForConditionalGeneration.from_pretrained("doublesizebed/parler-tts-mini-malay").to(self.device)
+        self.tts_tokenizer = AutoTokenizer.from_pretrained("C:/Users/Honor/app/model")
+        self.description_tokenizer = AutoTokenizer.from_pretrained(self.tts_model.config.text_encoder._name_or_path)
+    async def get_response(self, user_input, gender):
+        def build_prompt(user_question):
+            # 1) Mandate at top
+            instructions = (
+                "Never introduce yourself. "
+                "After your concise answer, ask exactly one relevant follow-up question.\n\n"
+            )
+            # 2) Few‑shot examples
+            demos = (
+                "Q: What is photosynthesis?\n"
+                "Answer: Photosynthesis lets plants convert sunlight into energy. Which plants interest you most?\n\n"
+                "Q: How do I make tea?\n"
+                "Answer: Steep tea leaves in hot water for 3–5 minutes, then serve. Do you prefer green or black tea?\n\n"
+            )
+            # 3) The actual user query
+            query = f"Q: {user_question}\nAnswer:"
+            return instructions + demos + query
+        full_prompt = build_prompt(user_input)
+        prompt_ids = self.tokenizer(full_prompt, return_tensors="pt").input_ids.to(self.device)
+        if self.chat_history_ids is None:
+            self.chat_history_ids = prompt_ids
+        else:
+            self.chat_history_ids = torch.cat([self.chat_history_ids, prompt_ids], dim=-1)
+        output = self.model.generate(
+            self.chat_history_ids,
+            max_length=self.chat_history_ids.shape[-1] + 128,
+            pad_token_id=self.tokenizer.pad_token_id,
+            do_sample=True,
+            temperature=0.5,
+            top_p=0.9,
+            top_k=50,
+            eos_token_id=self.tokenizer.eos_token_id,
         )
+        # update history so next turn continues the convo
+        self.chat_history_ids = output
+        generated_text = self.tokenizer.decode(output[0], skip_special_tokens=True)
+        # Remove the prompt if it's echoed back
+        if generated_text.startswith(full_prompt):
+            generated_text = generated_text[len(full_prompt):].strip()
+        def clean_response(text):
+            cleaned_text = re.sub(r"(?m)^(Q:|Answer:).*\n?", "", text)
+            return cleaned_text.strip()
+        final_text = clean_response(generated_text)
+        blob = TextBlob(final_text)
+        nouns = blob.noun_phrases
+        masked_sentence = final_text
+        for i, noun in enumerate(nouns):
+            placeholder = f"<<<noun_{i}>>>"
+            masked_sentence = re.sub(re.escape(noun), placeholder, masked_sentence, flags=re.IGNORECASE)
+        translated_masked_sentence = GoogleTranslator(source='en', target='ms').translate(masked_sentence)
+        def restore_placeholders(text, nouns_list):
+            def replacer(match):
+                index = int(match.group(1))
+                return nouns_list[index]
+            return re.sub(r"<<<\s*noun_(\d+)\s*>>>", replacer, text, flags=re.IGNORECASE)
+        final_sentence = restore_placeholders(translated_masked_sentence, nouns)
+        audio_file_path = await self.text_to_speech(final_sentence, gender)
+        return final_sentence, audio_file_path
+    async def text_to_speech(self, text, gender):
+        if gender.lower() == "male":
+            description = "A male speaker delivers a slightly expressive and animated speech with a moderate speed and pitch."
+        else:
+            description = "A female speaker delivers a slightly expressive and animated speech with a moderate speed and pitch."
+        desc_inputs = self.description_tokenizer(description, return_tensors="pt", padding=True).to(self.device)
+        text_inputs = self.tts_tokenizer(text, return_tensors="pt", padding=True).to(self.device)
+        generation = self.tts_model.generate(
+            input_ids=desc_inputs.input_ids,
+            attention_mask=desc_inputs.attention_mask,
+            prompt_input_ids=text_inputs.input_ids,
+            prompt_attention_mask=text_inputs.attention_mask
+        )
+        audio_arr = generation.cpu().numpy().squeeze()
+        output_filename = f"response.wav"
+        output_path = os.path.join(AUDIO_FOLDER, output_filename)
+        sf.write(output_path, audio_arr, self.tts_model.config.sampling_rate)
+        return output_filename
 chatbot = ChatBot()
 def chat_endpoint():
     data = request.get_json()
     user_text = data.get('message', '')
+    gender = data.get('gender', '')
     if not user_text:
         return jsonify({"error": "Empty message"}), 400
     resp_text, wav_name = asyncio.run(chatbot.chat(user_text, gender))

requirements.txt CHANGED Viewed

@@ -5,7 +5,7 @@ transformers>=4.30
 torch
 fasttext
 deep-translator
-textblob
 parler-tts
 soundfile
 nltk

 torch
 fasttext
 deep-translator
+textblob==0.17.1
 parler-tts
 soundfile
 nltk