Spaces:

doublesizebed
/

chatbot

Paused

App Files Files Community

doublesizebed commited on May 7, 2025

Commit

dc0e2d8

1 Parent(s): be38dd6

Updates

Browse files

Files changed (3) hide show

Dockerfile +5 -6
app.py +7 -3
requirements.txt +1 -0

Dockerfile CHANGED Viewed

@@ -23,18 +23,17 @@ RUN pip install --no-cache-dir Cython
 COPY requirements.txt .
 RUN pip install --no-cache-dir -r requirements.txt
-# Create writable directories
-RUN mkdir -p /data/hf_cache /data/transformers_cache /data/nltk_data /data/audio /static/audio && \
-    chmod -R 777 /data /static && \
-    chmod -R 777 /root/.cache
 # Set environment variables
 ENV HF_HOME=/data/hf_cache
 ENV TRANSFORMERS_CACHE=/data/transformers_cache
 ENV NLTK_DATA=/data/nltk_data
 ENV AUDIO_FOLDER=/data/audio
 # Copy application code
 COPY . .

 COPY requirements.txt .
 RUN pip install --no-cache-dir -r requirements.txt
 # Set environment variables
 ENV HF_HOME=/data/hf_cache
 ENV TRANSFORMERS_CACHE=/data/transformers_cache
 ENV NLTK_DATA=/data/nltk_data
 ENV AUDIO_FOLDER=/data/audio
+# Create writable directories
+RUN mkdir -p /data/hf_cache /data/transformers_cache /data/nltk_data /data/audio /static/audio && \
+    chmod -R 777 /data /static && \
+    chmod -R 777 /root/.cache
 # Copy application code
 COPY . .

app.py CHANGED Viewed

@@ -16,6 +16,7 @@ from torch.quantization import quantize_dynamic
 dir_path = os.path.dirname(os.path.realpath(__file__))
 app = Flask(__name__, static_folder="static", static_url_path="")
 CORS(app)
 # Paths
 AUDIO_FOLDER = '/static/audio'
@@ -33,7 +34,9 @@ except LookupError:
     nltk.download('punkt')
     nltk.download('punkt_tab')
-bnb_config = BitsAndBytesConfig(load_in_8bit=True, llm_int8_enable_fp32_cpu_offload=True)
 class ChatBot:
     def __init__(self):
@@ -42,7 +45,8 @@ class ChatBot:
         self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
         self.tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0", use_fast=False)
         self.model = AutoModelForCausalLM.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
-        self.model = torch.ao.quantization.quantize_dynamic(self.model, {torch.nn.Linear}, dtype=torch.qint8)
         # Parler-TTS Setup
         self.tts_model = ParlerTTSForConditionalGeneration.from_pretrained("doublesizebed/parler-tts-mini-malay")
         self.tts_model = torch.ao.quantization.quantize_dynamic(self.tts_model, {torch.nn.Linear}, dtype=torch.qint8)
@@ -158,7 +162,7 @@ def chat_endpoint():
     gender = data.get('gender', '')
     if not user_text:
         return jsonify({"error": "Empty message"}), 400
-    resp_text, wav_name = asyncio.run(chatbot.get_response(user_text, gender))
     url = f"audio/{wav_name}"
     return jsonify({"response": resp_text, "audiofile": url})

 dir_path = os.path.dirname(os.path.realpath(__file__))
 app = Flask(__name__, static_folder="static", static_url_path="")
 CORS(app)
+torch.set_num_threads(2)
 # Paths
 AUDIO_FOLDER = '/static/audio'
     nltk.download('punkt')
     nltk.download('punkt_tab')
+AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0", cache_dir=os.getenv("TRANSFORMERS_CACHE"))
+AutoModelForCausalLM.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0", cache_dir=os.getenv("TRANSFORMERS_CACHE"))
+ParlerTTSForConditionalGeneration.from_pretrained("doublesizebed/parler-tts-mini-malay", cache_dir=os.getenv("TRANSFORMERS_CACHE"))
 class ChatBot:
     def __init__(self):
         self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
         self.tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0", use_fast=False)
         self.model = AutoModelForCausalLM.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
+        self.model = torch.quantization.quantize_dynamic(self.model, {torch.nn.Linear}, dtype=torch.qint8)
         # Parler-TTS Setup
         self.tts_model = ParlerTTSForConditionalGeneration.from_pretrained("doublesizebed/parler-tts-mini-malay")
         self.tts_model = torch.ao.quantization.quantize_dynamic(self.tts_model, {torch.nn.Linear}, dtype=torch.qint8)
     gender = data.get('gender', '')
     if not user_text:
         return jsonify({"error": "Empty message"}), 400
+    resp_text, wav_name = asyncio.get_event_loop().run_until_complete(chatbot.get_response(user_text, gender))
     url = f"audio/{wav_name}"
     return jsonify({"response": resp_text, "audiofile": url})

requirements.txt CHANGED Viewed

@@ -11,6 +11,7 @@ soundfile
 textblob
 malaya
 parler_tts
 nltk
 bitsandbytes==0.45.5
 accelerate

 textblob
 malaya
 parler_tts
+numpy<2
 nltk
 bitsandbytes==0.45.5
 accelerate