datbkpro commited on
Commit
759c130
·
verified ·
1 Parent(s): 65be612

Update config/settings.py

Browse files
Files changed (1) hide show
  1. config/settings.py +61 -15
config/settings.py CHANGED
@@ -6,35 +6,81 @@ load_dotenv()
6
  class Settings:
7
  GROQ_API_KEY = os.getenv("GROQ_API_KEY")
8
 
9
- # Multilingual Model Settings - SỬA: Dùng models nhẹ và phổ biến
10
- VIETNAMESE_EMBEDDING_MODEL = 'sentence-transformers/all-MiniLM-L6-v2' # Model nhẹ, hỗ trợ nhiều ngôn ngữ
11
- VIETNAMESE_LLM_MODEL = "llama-3.1-8b-instant" # Model Groq
12
 
13
- MULTILINGUAL_EMBEDDING_MODEL = 'sentence-transformers/all-MiniLM-L6-v2' # Dùng model chung
14
- MULTILINGUAL_LLM_MODEL = "llama-3.1-8b-instant" # Model Groq
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
- # Fallback models
17
- FALLBACK_EMBEDDING_MODEL = 'sentence-transformers/all-MiniLM-L6-v2'
 
 
 
 
 
 
 
 
 
18
 
19
- # Default models
20
- DEFAULT_EMBEDDING_MODEL = 'sentence-transformers/all-MiniLM-L6-v2'
21
- DEFAULT_LLM_MODEL = "llama-3.1-8b-instant"
22
 
23
  WHISPER_MODEL = "whisper-large-v3-turbo"
24
 
25
  # TTS Settings
26
  MAX_CHUNK_LENGTH = 200
27
  SUPPORTED_LANGUAGES = {
28
- 'vi': 'Vietnamese', 'en': 'English', 'fr': 'French', 'es': 'Spanish',
29
- 'de': 'German', 'ja': 'Japanese', 'ko': 'Korean', 'zh': 'Chinese'
30
  }
31
 
32
  # RAG Settings
33
- EMBEDDING_DIMENSION = 384 # Dimension của all-MiniLM-L6-v2
 
 
34
  TOP_K_RESULTS = 3
35
 
36
- # SpeechBrain VAD Settings - TẠM TẮT để tránh lỗi
37
- VAD_MODEL = None # Tạm tắt VAD
38
  VAD_THRESHOLD = 0.5
39
  VAD_MIN_SILENCE_DURATION = 0.5
40
  VAD_SPEECH_PAD_DURATION = 0.1
 
6
  class Settings:
7
  GROQ_API_KEY = os.getenv("GROQ_API_KEY")
8
 
9
+ # Multilingual Model Settings
10
+ VIETNAMESE_EMBEDDING_MODEL = 'dangvantuan/vietnamese-embedding'
11
+ VIETNAMESE_LLM_MODEL = "Vietnamese_LLaMA2_13B_8K_SFT_General_Domain_Knowledge"
12
 
13
+ MULTILINGUAL_EMBEDDING_MODEL = 'Qwen/Qwen3-Embedding-4B'
14
+ MULTILINGUAL_LLM_MODEL = "import os
15
+ from dotenv import load_dotenv
16
+
17
+ load_dotenv()
18
+
19
+ class Settings:
20
+ GROQ_API_KEY = os.getenv("GROQ_API_KEY")
21
+
22
+ # Multilingual Model Settings
23
+ VIETNAMESE_EMBEDDING_MODEL = 'dangvantuan/vietnamese-embedding'
24
+ VIETNAMESE_LLM_MODEL = "llama-3.1-8b-instant"
25
+
26
+ MULTILINGUAL_EMBEDDING_MODEL = 'Qwen/Qwen3-Embedding-0.6B'
27
+ MULTILINGUAL_LLM_MODEL = "llama-3.1-8b-instant"
28
+
29
+ # Fallback models in case primary models fail
30
+ FALLBACK_MULTILINGUAL_EMBEDDING_MODEL = 'sentence-transformers/all-MiniLM-L6-v2'
31
+
32
+ # Default models (fallback)
33
+ DEFAULT_EMBEDDING_MODEL = 'dangvantuan/vietnamese-embedding'
34
+ DEFAULT_LLM_MODEL = "Vietnamese_LLaMA2_13B_8K_SFT_General_Domain_Knowledge"
35
+
36
+ WHISPER_MODEL = "whisper-large-v3-turbo"
37
+
38
+ # TTS Settings
39
+ MAX_CHUNK_LENGTH = 200
40
+ SUPPORTED_LANGUAGES = {
41
+ 'vi': 'vi', 'en': 'en', 'fr': 'fr', 'es': 'es',
42
+ 'de': 'de', 'ja': 'ja', 'ko': 'ko', 'zh': 'zh'
43
+ }
44
+
45
+ # RAG Settings
46
+ EMBEDDING_DIMENSION = 768 # For Vietnamese model
47
+ MULTILINGUAL_EMBEDDING_DIMENSION = 4096 # For Nemotron model
48
+
49
+ TOP_K_RESULTS = 3
50
 
51
+ # SpeechBrain VAD Settings
52
+ VAD_MODEL = "speechbrain/vad-crdnn-libriparty"
53
+ VAD_THRESHOLD = 0.5
54
+ VAD_MIN_SILENCE_DURATION = 0.5
55
+ VAD_SPEECH_PAD_DURATION = 0.1
56
+ SAMPLE_RATE = 16000
57
+
58
+ settings = Settings()"
59
+
60
+ # Fallback models in case primary models fail
61
+ FALLBACK_MULTILINGUAL_EMBEDDING_MODEL = 'sentence-transformers/all-MiniLM-L6-v2'
62
 
63
+ # Default models (fallback)
64
+ DEFAULT_EMBEDDING_MODEL = 'dangvantuan/vietnamese-embedding'
65
+ DEFAULT_LLM_MODEL = "Vietnamese_LLaMA2_13B_8K_SFT_General_Domain_Knowledge"
66
 
67
  WHISPER_MODEL = "whisper-large-v3-turbo"
68
 
69
  # TTS Settings
70
  MAX_CHUNK_LENGTH = 200
71
  SUPPORTED_LANGUAGES = {
72
+ 'vi': 'vi', 'en': 'en', 'fr': 'fr', 'es': 'es',
73
+ 'de': 'de', 'ja': 'ja', 'ko': 'ko', 'zh': 'zh'
74
  }
75
 
76
  # RAG Settings
77
+ EMBEDDING_DIMENSION = 768 # For Vietnamese model
78
+ MULTILINGUAL_EMBEDDING_DIMENSION = 4096 # For Nemotron model
79
+
80
  TOP_K_RESULTS = 3
81
 
82
+ # SpeechBrain VAD Settings
83
+ VAD_MODEL = "speechbrain/vad-crdnn-libriparty"
84
  VAD_THRESHOLD = 0.5
85
  VAD_MIN_SILENCE_DURATION = 0.5
86
  VAD_SPEECH_PAD_DURATION = 0.1