doublesizebed commited on
Commit
a053ac4
·
1 Parent(s): 556ba3d

Initial Docker-based Space

Browse files
Files changed (3) hide show
  1. Dockerfile +14 -0
  2. app.py +102 -0
  3. requirement.txt +13 -0
Dockerfile ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.9-slim
2
+
3
+ # System deps
4
+ RUN apt-get update && apt-get install -y ffmpeg git && rm -rf /var/lib/apt/lists/*
5
+
6
+ WORKDIR /app
7
+ COPY requirements.txt .
8
+ RUN pip install --no-cache-dir -r requirements.txt
9
+
10
+ # Copy source
11
+ COPY . .
12
+
13
+ EXPOSE 7860
14
+ CMD ["python", "app.py"]
app.py ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import re
3
+ import asyncio
4
+ import torch
5
+ import nltk
6
+ import soundfile as sf
7
+ from flask import Flask, request, jsonify, send_from_directory
8
+ from flask_cors import CORS
9
+ from transformers import AutoModelForCausalLM, AutoTokenizer
10
+ from deep_translator import GoogleTranslator
11
+ from textblob import TextBlob
12
+ from parler_tts import ParlerTTSForConditionalGeneration
13
+ from g2p import make_g2p
14
+ import fasttext
15
+ import string
16
+ from huggingface_hub import hf_hub_download
17
+
18
+ # Flask setup
19
+ dir_path = os.path.dirname(os.path.realpath(__file__))
20
+ app = Flask(__name__, static_folder="static")
21
+ CORS(app)
22
+
23
+ # Paths
24
+ AUDIO_FOLDER = os.path.join(dir_path, 'static', 'audio')
25
+ os.makedirs(AUDIO_FOLDER, exist_ok=True)
26
+
27
+ # Load language detection model
28
+ lid_model = fasttext.load_model(
29
+ hf_hub_download("doublesizebed/predict_malay_en", "lid_ms_en.bin")
30
+ )
31
+
32
+ def tokenize(text):
33
+ tokens = text.lower().split()
34
+ return [t.strip(string.punctuation) for t in tokens if t.strip(string.punctuation)]
35
+
36
+ def detect_lang(token):
37
+ label, _ = lid_model.predict(token)
38
+ return label[0].replace("__label__", "").upper()
39
+
40
+ # G2P models
41
+ g2p_ms_tokenizer = AutoTokenizer.from_pretrained("doublesizebed/G2P_malay")
42
+ g2p_ms_model = AutoModelForSeq2SeqLM.from_pretrained("doublesizebed/G2P_malay").to('cuda' if torch.cuda.is_available() else 'cpu')
43
+ g2p_eng = make_g2p("eng", "eng-ipa")
44
+
45
+ def predict_phonemes(word, lang):
46
+ if lang == "MS":
47
+ inputs = g2p_ms_tokenizer(word, return_tensors="pt", padding=True, truncation=True)
48
+ inputs = inputs.to(g2p_ms_model.device)
49
+ outputs = g2p_ms_model.generate(**inputs)
50
+ return g2p_ms_tokenizer.decode(outputs[0], skip_special_tokens=True)
51
+ else:
52
+ tg = g2p_eng(word)
53
+ return ' '.join(tg.to_sequence())
54
+
55
+ # Chatbot setup
56
+ class ChatBot:
57
+ def __init__(self):
58
+ self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
59
+ # Load conversation model\ self.tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
60
+ self.model = AutoModelForCausalLM.from_pretrained(
61
+ "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
62
+ ).to(self.device)
63
+ self.chat_history = None
64
+ # Parler TTS
65
+ self.tts_model = ParlerTTSForConditionalGeneration.from_pretrained(
66
+ "doublesizebed/parler-tts-mini-malay"
67
+ ).to(self.device)
68
+ self.tts_text_tokenizer = AutoTokenizer.from_pretrained(
69
+ self.tts_model.config.text_encoder._name_or_path
70
+ )
71
+ self.tts_desc_tokenizer = AutoTokenizer.from_pretrained(
72
+ self.tts_model.config.text_encoder._name_or_path
73
+ )
74
+ # NLTK\ nltk.download('brown')
75
+ nltk.download('punkt')
76
+ nltk.download('averaged_perceptron_tagger')
77
+
78
+ async def chat(self, user_input, gender):
79
+ # Build prompt ... (same as original)
80
+ # Generate response\ # Translate & mask nouns\ # TTS generation...
81
+ # Save WAV in static/audio and return filename
82
+ return "Translated text", "response.wav"
83
+
84
+ chatbot = ChatBot()
85
+
86
+ @app.route('/chat', methods=['POST'])
87
+ def chat_endpoint():
88
+ data = request.get_json()
89
+ user_text = data.get('message', '')
90
+ gender = data.get('gender', 'male')
91
+ if not user_text:
92
+ return jsonify({"error": "Empty message"}), 400
93
+ resp_text, wav_name = asyncio.run(chatbot.chat(user_text, gender))
94
+ url = f"/static/audio/{wav_name}"
95
+ return jsonify({"response": resp_text, "audiofile": url})
96
+
97
+ @app.route('/static/audio/<path:filename>')
98
+ def serve_audio(filename):
99
+ return send_from_directory(AUDIO_FOLDER, filename)
100
+
101
+ if __name__ == '__main__':
102
+ app.run(host='0.0.0.0', port=7860)
requirement.txt ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ flask
2
+ flask-cors
3
+ nest_asyncio
4
+ transformers>=4.30
5
+ torch
6
+ fasttext
7
+ deep-translator
8
+ textblob
9
+ parler-tts
10
+ soundfile
11
+ nltk
12
+ g2p-en
13
+ huggingface-hub