doublesizebed commited on
Commit
dc0e2d8
·
1 Parent(s): be38dd6
Files changed (3) hide show
  1. Dockerfile +5 -6
  2. app.py +7 -3
  3. requirements.txt +1 -0
Dockerfile CHANGED
@@ -23,18 +23,17 @@ RUN pip install --no-cache-dir Cython
23
  COPY requirements.txt .
24
  RUN pip install --no-cache-dir -r requirements.txt
25
 
26
- # Create writable directories
27
- RUN mkdir -p /data/hf_cache /data/transformers_cache /data/nltk_data /data/audio /static/audio && \
28
- chmod -R 777 /data /static && \
29
- chmod -R 777 /root/.cache
30
-
31
-
32
  # Set environment variables
33
  ENV HF_HOME=/data/hf_cache
34
  ENV TRANSFORMERS_CACHE=/data/transformers_cache
35
  ENV NLTK_DATA=/data/nltk_data
36
  ENV AUDIO_FOLDER=/data/audio
37
 
 
 
 
 
 
38
  # Copy application code
39
  COPY . .
40
 
 
23
  COPY requirements.txt .
24
  RUN pip install --no-cache-dir -r requirements.txt
25
 
 
 
 
 
 
 
26
  # Set environment variables
27
  ENV HF_HOME=/data/hf_cache
28
  ENV TRANSFORMERS_CACHE=/data/transformers_cache
29
  ENV NLTK_DATA=/data/nltk_data
30
  ENV AUDIO_FOLDER=/data/audio
31
 
32
+ # Create writable directories
33
+ RUN mkdir -p /data/hf_cache /data/transformers_cache /data/nltk_data /data/audio /static/audio && \
34
+ chmod -R 777 /data /static && \
35
+ chmod -R 777 /root/.cache
36
+
37
  # Copy application code
38
  COPY . .
39
 
app.py CHANGED
@@ -16,6 +16,7 @@ from torch.quantization import quantize_dynamic
16
  dir_path = os.path.dirname(os.path.realpath(__file__))
17
  app = Flask(__name__, static_folder="static", static_url_path="")
18
  CORS(app)
 
19
 
20
  # Paths
21
  AUDIO_FOLDER = '/static/audio'
@@ -33,7 +34,9 @@ except LookupError:
33
  nltk.download('punkt')
34
  nltk.download('punkt_tab')
35
 
36
- bnb_config = BitsAndBytesConfig(load_in_8bit=True, llm_int8_enable_fp32_cpu_offload=True)
 
 
37
 
38
  class ChatBot:
39
  def __init__(self):
@@ -42,7 +45,8 @@ class ChatBot:
42
  self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
43
  self.tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0", use_fast=False)
44
  self.model = AutoModelForCausalLM.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
45
- self.model = torch.ao.quantization.quantize_dynamic(self.model, {torch.nn.Linear}, dtype=torch.qint8)
 
46
  # Parler-TTS Setup
47
  self.tts_model = ParlerTTSForConditionalGeneration.from_pretrained("doublesizebed/parler-tts-mini-malay")
48
  self.tts_model = torch.ao.quantization.quantize_dynamic(self.tts_model, {torch.nn.Linear}, dtype=torch.qint8)
@@ -158,7 +162,7 @@ def chat_endpoint():
158
  gender = data.get('gender', '')
159
  if not user_text:
160
  return jsonify({"error": "Empty message"}), 400
161
- resp_text, wav_name = asyncio.run(chatbot.get_response(user_text, gender))
162
  url = f"audio/{wav_name}"
163
  return jsonify({"response": resp_text, "audiofile": url})
164
 
 
16
  dir_path = os.path.dirname(os.path.realpath(__file__))
17
  app = Flask(__name__, static_folder="static", static_url_path="")
18
  CORS(app)
19
+ torch.set_num_threads(2)
20
 
21
  # Paths
22
  AUDIO_FOLDER = '/static/audio'
 
34
  nltk.download('punkt')
35
  nltk.download('punkt_tab')
36
 
37
+ AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0", cache_dir=os.getenv("TRANSFORMERS_CACHE"))
38
+ AutoModelForCausalLM.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0", cache_dir=os.getenv("TRANSFORMERS_CACHE"))
39
+ ParlerTTSForConditionalGeneration.from_pretrained("doublesizebed/parler-tts-mini-malay", cache_dir=os.getenv("TRANSFORMERS_CACHE"))
40
 
41
  class ChatBot:
42
  def __init__(self):
 
45
  self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
46
  self.tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0", use_fast=False)
47
  self.model = AutoModelForCausalLM.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
48
+ self.model = torch.quantization.quantize_dynamic(self.model, {torch.nn.Linear}, dtype=torch.qint8)
49
+
50
  # Parler-TTS Setup
51
  self.tts_model = ParlerTTSForConditionalGeneration.from_pretrained("doublesizebed/parler-tts-mini-malay")
52
  self.tts_model = torch.ao.quantization.quantize_dynamic(self.tts_model, {torch.nn.Linear}, dtype=torch.qint8)
 
162
  gender = data.get('gender', '')
163
  if not user_text:
164
  return jsonify({"error": "Empty message"}), 400
165
+ resp_text, wav_name = asyncio.get_event_loop().run_until_complete(chatbot.get_response(user_text, gender))
166
  url = f"audio/{wav_name}"
167
  return jsonify({"response": resp_text, "audiofile": url})
168
 
requirements.txt CHANGED
@@ -11,6 +11,7 @@ soundfile
11
  textblob
12
  malaya
13
  parler_tts
 
14
  nltk
15
  bitsandbytes==0.45.5
16
  accelerate
 
11
  textblob
12
  malaya
13
  parler_tts
14
+ numpy<2
15
  nltk
16
  bitsandbytes==0.45.5
17
  accelerate