Spaces:

doublesizebed
/

chatbot

Sleeping

App Files Files Community

doublesizebed commited on May 7, 2025

Commit

47ab7c3

1 Parent(s): cde87a2

Updates

Browse files

Files changed (3) hide show

Dockerfile +4 -0
app.py +10 -4
requirements.txt +3 -2

Dockerfile CHANGED Viewed

@@ -5,6 +5,10 @@ RUN apt-get update && apt-get install -y \
     ffmpeg git build-essential python3-dev && \
     rm -rf /var/lib/apt/lists/*
 # Set working directory
 WORKDIR /app/chatbot

     ffmpeg git build-essential python3-dev && \
     rm -rf /var/lib/apt/lists/*
+# Install CPU-specific PyTorch (wheel from PyTorch index)
+RUN pip install torch==2.1.1+cpu torchvision==0.16.1+cpu torchaudio==2.1.1+cpu \
+    -f https://download.pytorch.org/whl/torch_stable.html
 # Set working directory
 WORKDIR /app/chatbot

app.py CHANGED Viewed

@@ -5,11 +5,12 @@ import torch
 import soundfile as sf
 from flask import Flask, request, jsonify, send_from_directory
 from flask_cors import CORS
-from transformers import AutoModelForCausalLM, AutoTokenizer
 from deep_translator import GoogleTranslator
 from textblob import TextBlob
 import nltk
 from parler_tts import ParlerTTSForConditionalGeneration
 # Flask setup
 dir_path = os.path.dirname(os.path.realpath(__file__))
@@ -32,16 +33,19 @@ except LookupError:
     nltk.download('punkt')
     nltk.download('punkt_tab')
 class ChatBot:
     def __init__(self):
         self.chat_history_ids = None
         self.bot_input_ids = None
         self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
-        self.tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
-        self.model = AutoModelForCausalLM.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0").to(self.device)
         # Parler-TTS Setup
-        self.tts_model = ParlerTTSForConditionalGeneration.from_pretrained("doublesizebed/parler-tts-mini-malay").to(self.device)
         self.tts_tokenizer = AutoTokenizer.from_pretrained("doublesizebed/parler-tts-mini-malay")
         self.description_tokenizer = AutoTokenizer.from_pretrained(self.tts_model.config.text_encoder._name_or_path)
@@ -71,6 +75,7 @@ class ChatBot:
         else:
             self.chat_history_ids = torch.cat([self.chat_history_ids, prompt_ids], dim=-1)
         output = self.model.generate(
             self.chat_history_ids,
             max_length=self.chat_history_ids.shape[-1] + 128,
@@ -127,6 +132,7 @@ class ChatBot:
         desc_inputs = self.description_tokenizer(description, return_tensors="pt", padding=True).to(self.device)
         text_inputs = self.tts_tokenizer(text, return_tensors="pt", padding=True).to(self.device)
         generation = self.tts_model.generate(
             input_ids=desc_inputs.input_ids,
             attention_mask=desc_inputs.attention_mask,

 import soundfile as sf
 from flask import Flask, request, jsonify, send_from_directory
 from flask_cors import CORS
+from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
 from deep_translator import GoogleTranslator
 from textblob import TextBlob
 import nltk
 from parler_tts import ParlerTTSForConditionalGeneration
+from torch.quantization import quantize_dynamic
 # Flask setup
 dir_path = os.path.dirname(os.path.realpath(__file__))
     nltk.download('punkt')
     nltk.download('punkt_tab')
+bnb_config = BitsAndBytesConfig(load_in_8bit=True, llm_int8_enable_fp32_cpu_offload=True)
 class ChatBot:
     def __init__(self):
         self.chat_history_ids = None
         self.bot_input_ids = None
         self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
+        self.tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0", use_fast=False)
+        self.model = AutoModelForCausalLM.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0", quantization_config=bnb_config, device_map="cpu")
+        self.model = quantize_dynamic(self.model, {torch.nn.Linear}, dtype=torch.qint8)
         # Parler-TTS Setup
+        self.tts_model = ParlerTTSForConditionalGeneration.from_pretrained("doublesizebed/parler-tts-mini-malay").to("cpu")
+        self.tts_model = quantize_dynamic(self.tts_model, {torch.nn.Linear}, dtype=torch.qint8)
         self.tts_tokenizer = AutoTokenizer.from_pretrained("doublesizebed/parler-tts-mini-malay")
         self.description_tokenizer = AutoTokenizer.from_pretrained(self.tts_model.config.text_encoder._name_or_path)
         else:
             self.chat_history_ids = torch.cat([self.chat_history_ids, prompt_ids], dim=-1)
+        self.model.eval()
         output = self.model.generate(
             self.chat_history_ids,
             max_length=self.chat_history_ids.shape[-1] + 128,
         desc_inputs = self.description_tokenizer(description, return_tensors="pt", padding=True).to(self.device)
         text_inputs = self.tts_tokenizer(text, return_tensors="pt", padding=True).to(self.device)
+        self.tts_model.eval()
         generation = self.tts_model.generate(
             input_ids=desc_inputs.input_ids,
             attention_mask=desc_inputs.attention_mask,

requirements.txt CHANGED Viewed

@@ -2,7 +2,7 @@ flask
 Cython
 flask[async]
 waitress
-torch
 transformers
 deep-translator
 nest_asyncio
@@ -11,4 +11,5 @@ soundfile
 textblob
 malaya
 parler_tts
-nltk

 Cython
 flask[async]
 waitress
+torch==2.1.1+cpu
 transformers
 deep-translator
 nest_asyncio
 textblob
 malaya
 parler_tts
+nltk
+bitsandbytes