Spaces:
Paused
Paused
dev-patch-1
#1
by
ImPolymath
- opened
- .gitignore +0 -5
- README.md +6 -42
- app.py +672 -52
- core/DetectLanguage.py +0 -153
- core/__init__.py +0 -0
- core/audio_files.py +0 -142
- core/audio_isolation.py +0 -66
- core/clients_connection_check.py +0 -11
- core/converter.py +0 -99
- core/core.py +0 -108
- core/demorrha.py +0 -216
- core/files.py +0 -53
- core/moderation.py +0 -60
- core/sound_generation.py +0 -1
- core/speech_to_text.py +0 -255
- core/text_to_speech.py +0 -162
- pages/configuration_ui_lang.py +0 -128
- pages/main.py +0 -675
- requirements.txt +5 -4
- scripts/gen_ui_lang_support.py +1 -23
- scripts/list_and_delete_assistants.py +0 -31
- scripts/ui_lang_updater.py +0 -177
- ui_lang_support.json +0 -0
- var_app.py +0 -57
.gitignore
CHANGED
|
@@ -94,8 +94,3 @@ ENV/
|
|
| 94 |
# Donnรฉes sensibles ou de configuration locale
|
| 95 |
credentials.json
|
| 96 |
config.ini
|
| 97 |
-
|
| 98 |
-
test_samples/
|
| 99 |
-
test-samples/
|
| 100 |
-
|
| 101 |
-
.python-version
|
|
|
|
| 94 |
# Donnรฉes sensibles ou de configuration locale
|
| 95 |
credentials.json
|
| 96 |
config.ini
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
README.md
CHANGED
|
@@ -1,49 +1,13 @@
|
|
| 1 |
---
|
| 2 |
-
title: Demorrha
|
| 3 |
-
emoji:
|
| 4 |
-
colorFrom:
|
| 5 |
-
colorTo:
|
| 6 |
-
python_version: 3.10.15
|
| 7 |
sdk: streamlit
|
| 8 |
-
sdk_version: 1.
|
| 9 |
app_file: app.py
|
| 10 |
-
pinned:
|
| 11 |
license: gpl-3.0
|
| 12 |
-
thumbnail: >-
|
| 13 |
-
https://cdn-uploads.huggingface.co/production/uploads/6616ef2ccba578190b887876/RSTUMUn6KegraYqq66pbG.jpeg
|
| 14 |
-
short_description: 'Quest: "Break down the language barriers between people..."'
|
| 15 |
---
|
| 16 |
|
| 17 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
# Demorrha v1
|
| 21 |
-
> Quest: "Break down the language barriers between people..."
|
| 22 |
-
|
| 23 |
-
Demorrha is an AI-powered translation assistant that helps facilitate communication across different languages. It provides real-time translation capabilities through text, audio, and file inputs.
|
| 24 |
-
|
| 25 |
-
## Key Features
|
| 26 |
-
- Multi-language support with up to 4 target languages simultaneously
|
| 27 |
-
- Text-to-Speech and Speech-to-Text capabilities
|
| 28 |
-
- File upload support for text documents
|
| 29 |
-
- User-friendly interface with customizable settings
|
| 30 |
-
- Real-time streaming translations
|
| 31 |
-
|
| 32 |
-
## Getting Started
|
| 33 |
-
Visit the app at [Demorrha on Hugging Face](https://huggingface.co/spaces/ImPolymath/demorrha)
|
| 34 |
-
|
| 35 |
-
## Usage
|
| 36 |
-
1. Select your interface language
|
| 37 |
-
2. Choose up to 4 target languages for translation
|
| 38 |
-
3. Input text, upload a file, or use voice input
|
| 39 |
-
4. Get real-time translations in your selected languages
|
| 40 |
-
|
| 41 |
-
## Technical Details
|
| 42 |
-
- Built with Streamlit
|
| 43 |
-
- Uses OpenAI's GPT models for translation
|
| 44 |
-
- Integrates Azure Speech Services for voice capabilities
|
| 45 |
-
- Supports multiple file formats for text extraction
|
| 46 |
-
|
| 47 |
-
## Contributing
|
| 48 |
-
Contributions are welcome! Please feel free to submit a Pull Request.
|
| 49 |
-
|
|
|
|
| 1 |
---
|
| 2 |
+
title: Demorrha
|
| 3 |
+
emoji: ๐จ
|
| 4 |
+
colorFrom: yellow
|
| 5 |
+
colorTo: blue
|
|
|
|
| 6 |
sdk: streamlit
|
| 7 |
+
sdk_version: 1.38.0
|
| 8 |
app_file: app.py
|
| 9 |
+
pinned: false
|
| 10 |
license: gpl-3.0
|
|
|
|
|
|
|
|
|
|
| 11 |
---
|
| 12 |
|
| 13 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
app.py
CHANGED
|
@@ -16,35 +16,22 @@ from typing import Tuple
|
|
| 16 |
from typing import Union
|
| 17 |
|
| 18 |
# Third-party libraries
|
|
|
|
| 19 |
import streamlit as st
|
|
|
|
|
|
|
|
|
|
| 20 |
|
| 21 |
|
| 22 |
-
|
| 23 |
|
| 24 |
# Au dรฉbut du fichier, aprรจs les imports
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
LANGUAGES_EMOJI = {
|
| 34 |
-
"Afrikaans": "๐ฟ๐ฆ", "Arabic": "๐ธ๐ฆ", "Armenian": "๐ฆ๐ฒ", "Azerbaijani": "๐ฆ๐ฟ", "Belarusian": "๐ง๐พ",
|
| 35 |
-
"Bosnian": "๐ง๐ฆ", "Bulgarian": "๐ง๐ฌ", "Catalan": "๐ช๐ธ", "Chinese": "๐จ๐ณ", "Croatian": "๐ญ๐ท",
|
| 36 |
-
"Czech": "๐จ๐ฟ", "Danish": "๐ฉ๐ฐ", "Dutch": "๐ณ๐ฑ", "English": "๐ฌ๐ง", "Estonian": "๐ช๐ช",
|
| 37 |
-
"Finnish": "๐ซ๐ฎ", "French": "๐ซ๐ท", "Galician": "๐ช๐ธ", "German": "๐ฉ๐ช", "Greek": "๐ฌ๐ท",
|
| 38 |
-
"Hebrew": "๐ฎ๐ฑ", "Hindi": "๐ฎ๐ณ", "Hungarian": "๐ญ๐บ", "Icelandic": "๐ฎ๐ธ", "Indonesian": "๐ฎ๐ฉ",
|
| 39 |
-
"Italian": "๐ฎ๐น", "Japanese": "๐ฏ๐ต", "Kannada": "๐ฎ๐ณ", "Kazakh": "๐ฐ๐ฟ", "Korean": "๐ฐ๐ท",
|
| 40 |
-
"Latvian": "๐ฑ๐ป", "Lithuanian": "๐ฑ๐น", "Macedonian": "๐ฒ๐ฐ", "Malay": "๐ฒ๐พ", "Marathi": "๐ฎ๐ณ",
|
| 41 |
-
"Maori": "๐ณ๐ฟ", "Nepali": "๐ณ๐ต", "Norwegian": "๐ณ๐ด", "Persian": "๐ฎ๐ท", "Polish": "๐ต๐ฑ",
|
| 42 |
-
"Portuguese": "๐ต๐น", "Romanian": "๐ท๐ด", "Russian": "๐ท๐บ", "Serbian": "๐ท๐ธ", "Slovak": "๐ธ๐ฐ",
|
| 43 |
-
"Slovenian": "๐ธ๐ฎ", "Spanish": "๐ช๐ธ", "Swahili": "๐ฐ๐ช", "Swedish": "๐ธ๐ช", "Tagalog": "๐ต๐ญ",
|
| 44 |
-
"Tamil": "๐ฎ๐ณ", "Thai": "๐น๐ญ", "Turkish": "๐น๐ท", "Ukrainian": "๐บ๐ฆ", "Urdu": "๐ต๐ฐ",
|
| 45 |
-
"Vietnamese": "๐ป๐ณ", "Welsh": "๐ด๓ ง๓ ข๓ ท๓ ฌ๓ ณ๓ ฟ"
|
| 46 |
-
}
|
| 47 |
-
|
| 48 |
|
| 49 |
def load_ui_language(file_path: Optional[str] = "ui_lang_support.json") -> Dict[str, Any]:
|
| 50 |
"""
|
|
@@ -69,48 +56,681 @@ def load_ui_language(file_path: Optional[str] = "ui_lang_support.json") -> Dict[
|
|
| 69 |
print(f"{get_translation('erreur_lecture_fichier')} {e}")
|
| 70 |
return {}
|
| 71 |
|
|
|
|
|
|
|
| 72 |
|
| 73 |
def get_translation(key: str) -> str:
|
| 74 |
"""
|
| 75 |
Obtient la traduction pour une clรฉ donnรฉe basรฉe sur la langue d'interface sรฉlectionnรฉe.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 76 |
"""
|
| 77 |
-
|
| 78 |
-
return translations.get(lang, {}).get(key, key)
|
| 79 |
|
| 80 |
-
#
|
| 81 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 82 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 83 |
|
|
|
|
|
|
|
|
|
|
| 84 |
|
|
|
|
|
|
|
|
|
|
| 85 |
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 102 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 103 |
|
| 104 |
-
|
| 105 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 106 |
|
| 107 |
-
#
|
| 108 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 109 |
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 114 |
|
| 115 |
# Point d'entrรฉe de l'application
|
| 116 |
if __name__ == "__main__":
|
|
|
|
| 16 |
from typing import Union
|
| 17 |
|
| 18 |
# Third-party libraries
|
| 19 |
+
import requests
|
| 20 |
import streamlit as st
|
| 21 |
+
from audiorecorder import audiorecorder
|
| 22 |
+
from openai import OpenAI
|
| 23 |
+
from pydub import AudioSegment
|
| 24 |
|
| 25 |
|
| 26 |
+
__version__ = "1.1.4"
|
| 27 |
|
| 28 |
# Au dรฉbut du fichier, aprรจs les imports
|
| 29 |
+
st.set_page_config(
|
| 30 |
+
page_title=f"DEMORRHA - (v{__version__})",
|
| 31 |
+
page_icon="๐น",
|
| 32 |
+
layout="wide",
|
| 33 |
+
initial_sidebar_state="collapsed"
|
| 34 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
|
| 36 |
def load_ui_language(file_path: Optional[str] = "ui_lang_support.json") -> Dict[str, Any]:
|
| 37 |
"""
|
|
|
|
| 56 |
print(f"{get_translation('erreur_lecture_fichier')} {e}")
|
| 57 |
return {}
|
| 58 |
|
| 59 |
+
# Dictionary to store translations
|
| 60 |
+
translations = load_ui_language()
|
| 61 |
|
| 62 |
def get_translation(key: str) -> str:
|
| 63 |
"""
|
| 64 |
Obtient la traduction pour une clรฉ donnรฉe basรฉe sur la langue d'interface sรฉlectionnรฉe.
|
| 65 |
+
|
| 66 |
+
Args:
|
| 67 |
+
key (str): La clรฉ de traduction.
|
| 68 |
+
|
| 69 |
+
Returns:
|
| 70 |
+
str: Le texte traduit.
|
| 71 |
"""
|
| 72 |
+
return translations[st.session_state.interface_language][key]
|
|
|
|
| 73 |
|
| 74 |
+
# OpenAI client configuration with API key
|
| 75 |
+
client = OpenAI(api_key=getenv("OPENAI_API_KEY"))
|
| 76 |
+
|
| 77 |
+
def read_file(file_name: str) -> str:
|
| 78 |
+
"""
|
| 79 |
+
Lit et retourne le contenu des fichiers texte.
|
| 80 |
+
|
| 81 |
+
Args:
|
| 82 |
+
file_name (str): Le nom du fichier ร lire.
|
| 83 |
|
| 84 |
+
Returns:
|
| 85 |
+
str: Le contenu du fichier ou un message d'erreur.
|
| 86 |
+
"""
|
| 87 |
+
try:
|
| 88 |
+
with open(file_name, 'r', encoding='utf-8') as file:
|
| 89 |
+
content = file.read()
|
| 90 |
+
return content
|
| 91 |
+
except FileNotFoundError:
|
| 92 |
+
return f"{get_translation('erreur_fichier_non_trouve')} {file_name}"
|
| 93 |
+
except IOError as e:
|
| 94 |
+
return f"{get_translation('erreur_lecture_fichier')} {str(e)}"
|
| 95 |
|
| 96 |
+
def split_audio(audio_file: str, max_size_mb: int = 25) -> List[str]:
|
| 97 |
+
"""
|
| 98 |
+
Divise un fichier audio en segments de 25 Mo ou moins.
|
| 99 |
|
| 100 |
+
Args:
|
| 101 |
+
audio_file (str): Chemin vers le fichier audio.
|
| 102 |
+
max_size_mb (int): Taille maximale de chaque segment en Mo.
|
| 103 |
|
| 104 |
+
Returns:
|
| 105 |
+
List[str]: Liste des chemins vers les segments audio divisรฉs.
|
| 106 |
+
"""
|
| 107 |
+
try:
|
| 108 |
+
audio = AudioSegment.from_wav(audio_file)
|
| 109 |
+
duration_ms = len(audio)
|
| 110 |
+
segment_duration_ms = int(
|
| 111 |
+
(max_size_mb * 1024 * 1024 * 8) /
|
| 112 |
+
(audio.frame_rate * audio.sample_width * audio.channels)
|
| 113 |
+
)
|
| 114 |
+
|
| 115 |
+
segments = []
|
| 116 |
+
for start in range(0, duration_ms, segment_duration_ms):
|
| 117 |
+
end = min(start + segment_duration_ms, duration_ms)
|
| 118 |
+
segment = audio[start:end]
|
| 119 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_segment:
|
| 120 |
+
segment.export(temp_segment.name, format="wav")
|
| 121 |
+
segments.append(temp_segment.name)
|
| 122 |
+
|
| 123 |
+
return segments
|
| 124 |
+
except IOError as e:
|
| 125 |
+
print(f"Erreur lors de la lecture ou de l'รฉcriture du fichier audio : {e}")
|
| 126 |
+
return []
|
| 127 |
+
except ValueError as e:
|
| 128 |
+
print(f"Erreur de valeur lors du traitement de l'audio : {e}")
|
| 129 |
+
return []
|
| 130 |
+
|
| 131 |
+
# Fonction modifiรฉe pour transcrire l'audio en texte
|
| 132 |
+
def transcribe_audio(audio_file: IO, language: Optional[str] = None) -> str:
|
| 133 |
+
"""
|
| 134 |
+
Transcrit un fichier audio en texte.
|
| 135 |
+
|
| 136 |
+
Args:
|
| 137 |
+
audio_file (IO): Le fichier audio ร transcrire.
|
| 138 |
+
language (Optional[str]): La langue de l'audio. Par dรฉfaut None.
|
| 139 |
+
|
| 140 |
+
Returns:
|
| 141 |
+
str: Le texte transcrit.
|
| 142 |
+
"""
|
| 143 |
+
max_size_mb = 25
|
| 144 |
+
file_size_mb = os.path.getsize(audio_file.name) / (1024 * 1024)
|
| 145 |
+
|
| 146 |
+
try:
|
| 147 |
+
with st.status("Transcription de l'audio en cours...") as status:
|
| 148 |
+
if file_size_mb > max_size_mb:
|
| 149 |
+
status.update(label="Dรฉcoupage de l'audio en segments...")
|
| 150 |
+
segments = split_audio(audio_file.name, max_size_mb)
|
| 151 |
+
full_transcript = ""
|
| 152 |
+
for i, segment in enumerate(segments):
|
| 153 |
+
status.update(label=f"Transcription du segment {i+1}/{len(segments)}...")
|
| 154 |
+
with open(segment, "rb") as audio_segment:
|
| 155 |
+
transcript = client.audio.transcriptions.create(
|
| 156 |
+
model="whisper-1",
|
| 157 |
+
file=audio_segment,
|
| 158 |
+
language=language
|
| 159 |
+
)
|
| 160 |
+
full_transcript += f"{transcript.text} "
|
| 161 |
+
os.unlink(segment) # Supprime le fichier temporaire
|
| 162 |
+
status.update(label="Transcription terminรฉe", state="complete")
|
| 163 |
+
return full_transcript.strip()
|
| 164 |
+
else:
|
| 165 |
+
status.update(label="Transcription de l'audio...")
|
| 166 |
+
with open(audio_file.name, "rb") as audio_file:
|
| 167 |
+
transcript = client.audio.transcriptions.create(
|
| 168 |
+
model="whisper-1",
|
| 169 |
+
file=audio_file,
|
| 170 |
+
language=language
|
| 171 |
+
)
|
| 172 |
+
status.update(label="Transcription terminรฉe", state="complete")
|
| 173 |
+
return transcript.text
|
| 174 |
+
except IOError as e:
|
| 175 |
+
st.error(f"Erreur d'entrรฉe/sortie lors de la transcription : {e}")
|
| 176 |
+
return ""
|
| 177 |
+
except client.APIError as e:
|
| 178 |
+
st.error(f"Erreur API lors de la transcription : {e}")
|
| 179 |
+
return ""
|
| 180 |
+
|
| 181 |
+
# Fonction pour dรฉtecter la langue d'un texte donnรฉ
|
| 182 |
+
def detect_language(input_text: str, temperature: float = 0.01) -> str:
|
| 183 |
+
"""
|
| 184 |
+
Dรฉtecte la langue d'un texte donnรฉ.
|
| 185 |
+
|
| 186 |
+
Args:
|
| 187 |
+
input_text (str): Le texte dont il faut dรฉtecter la langue.
|
| 188 |
+
temperature (float): La tempรฉrature pour le modรจle de langage. Par dรฉfaut ร 0.01.
|
| 189 |
+
|
| 190 |
+
Returns:
|
| 191 |
+
str: La langue dรฉtectรฉe au format ISO-639-1.
|
| 192 |
+
|
| 193 |
+
Raises:
|
| 194 |
+
ValueError: Si la rรฉponse de l'API est invalide.
|
| 195 |
+
requests.RequestException: En cas d'erreur de communication avec l'API.
|
| 196 |
+
"""
|
| 197 |
+
system_prompt = (
|
| 198 |
+
"Agissez comme une fonction de dรฉtection de langue. "
|
| 199 |
+
"Je fournirai du texte dans n'importe quelle langue, et vous dรฉtecterez sa langue. "
|
| 200 |
+
"Fournissez le rรฉsultat de votre dรฉtection au format ISO-639-1. "
|
| 201 |
+
"Votre rรฉponse doit reprรฉsenter l'argument `language` et ne contenir "
|
| 202 |
+
"que sa valeur sous forme de chaรฎne. "
|
| 203 |
+
"Fournir la langue d'entrรฉe au format ISO-639-1 amรฉliorera la prรฉcision et la latence."
|
| 204 |
+
)
|
| 205 |
+
try:
|
| 206 |
+
response = client.chat.completions.create(
|
| 207 |
+
model="gpt-4o-mini",
|
| 208 |
+
temperature=temperature,
|
| 209 |
+
messages=[
|
| 210 |
+
{
|
| 211 |
+
"role": "system",
|
| 212 |
+
"content": system_prompt
|
| 213 |
+
},
|
| 214 |
+
{
|
| 215 |
+
"role": "user",
|
| 216 |
+
"content": input_text
|
| 217 |
+
}
|
| 218 |
+
]
|
| 219 |
+
)
|
| 220 |
+
detected_language = response.choices[0].message.content
|
| 221 |
+
if not detected_language:
|
| 222 |
+
raise ValueError("La rรฉponse de l'API est vide")
|
| 223 |
+
return detected_language
|
| 224 |
+
except requests.RequestException as e:
|
| 225 |
+
raise requests.RequestException(f"Erreur de communication avec l'API : {str(e)}")
|
| 226 |
+
except Exception as e:
|
| 227 |
+
raise ValueError(f"Erreur inattendue lors de la dรฉtection de la langue : {str(e)}")
|
| 228 |
+
|
| 229 |
+
def get_duration_pydub(audio_file: str) -> float:
|
| 230 |
+
"""
|
| 231 |
+
Obtient la durรฉe d'un fichier audio en utilisant pydub.
|
| 232 |
+
|
| 233 |
+
Args:
|
| 234 |
+
audio_file (str): Chemin vers le fichier audio.
|
| 235 |
+
|
| 236 |
+
Returns:
|
| 237 |
+
float: Durรฉe du fichier audio en secondes.
|
| 238 |
+
"""
|
| 239 |
+
try:
|
| 240 |
+
audio = AudioSegment.from_file(audio_file)
|
| 241 |
+
return audio.duration_seconds
|
| 242 |
+
except FileNotFoundError:
|
| 243 |
+
print(f"Erreur : Le fichier audio '{audio_file}' n'a pas รฉtรฉ trouvรฉ.")
|
| 244 |
+
return 0.0
|
| 245 |
+
except Exception as e:
|
| 246 |
+
print(f"Erreur lors de la lecture du fichier audio : {str(e)}")
|
| 247 |
+
return 0.0
|
| 248 |
+
|
| 249 |
+
def text_to_speech(text: str) -> Tuple[Optional[bytes], float]:
|
| 250 |
+
"""
|
| 251 |
+
Convertit du texte en parole en utilisant l'API OpenAI.
|
| 252 |
+
|
| 253 |
+
Args:
|
| 254 |
+
text (str): Le texte ร convertir en parole.
|
| 255 |
+
|
| 256 |
+
Returns:
|
| 257 |
+
Tuple[Optional[bytes], float]: Un tuple contenant les octets audio et la durรฉe de l'audio en secondes.
|
| 258 |
+
"""
|
| 259 |
+
try:
|
| 260 |
+
response = client.audio.speech.create(
|
| 261 |
+
model="tts-1",
|
| 262 |
+
voice=st.session_state.tts_voice,
|
| 263 |
+
input=text
|
| 264 |
+
)
|
| 265 |
+
|
| 266 |
+
# Sauvegarde l'audio dans un fichier temporaire
|
| 267 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_audio:
|
| 268 |
+
response.stream_to_file(temp_audio.name)
|
| 269 |
+
|
| 270 |
+
# Lit le contenu du fichier audio
|
| 271 |
+
with open(temp_audio.name, "rb") as audio_file:
|
| 272 |
+
audio_bytes = audio_file.read()
|
| 273 |
+
|
| 274 |
+
# Obtient la durรฉe de l'audio en secondes
|
| 275 |
+
audio_duration = get_duration_pydub(temp_audio.name)
|
| 276 |
+
|
| 277 |
+
return audio_bytes, audio_duration
|
| 278 |
+
except Exception as e:
|
| 279 |
+
print(f"Erreur lors de la conversion texte-parole : {str(e)}")
|
| 280 |
+
return None, 0.0
|
| 281 |
+
|
| 282 |
+
def concatenate_audio_files(audio_list: List[Tuple[bytes, float]]) -> Optional[bytes]:
|
| 283 |
+
"""
|
| 284 |
+
Concatรจne plusieurs fichiers audio avec des effets sonores.
|
| 285 |
+
|
| 286 |
+
Args:
|
| 287 |
+
audio_list (List[Tuple[bytes, float]]): Une liste de tuples, chacun contenant
|
| 288 |
+
des octets audio et la durรฉe.
|
| 289 |
+
|
| 290 |
+
Returns:
|
| 291 |
+
Optional[bytes]: L'audio concatรฉnรฉ sous forme d'octets, ou None en cas d'erreur.
|
| 292 |
+
"""
|
| 293 |
+
# Crรฉer un segment audio vide
|
| 294 |
+
final_audio = AudioSegment.empty()
|
| 295 |
|
| 296 |
+
try:
|
| 297 |
+
# Charger les effets sonores
|
| 298 |
+
begin_sound = AudioSegment.from_mp3(
|
| 299 |
+
"sound-effects/voice-message-play-begin/voice-message-play-begin-1.mp3"
|
| 300 |
+
)
|
| 301 |
+
end_sound = AudioSegment.from_mp3(
|
| 302 |
+
"sound-effects/voice-message-play-ending/voice-message-play-ending-1.mp3"
|
| 303 |
+
)
|
| 304 |
+
|
| 305 |
+
# 5 secondes de silence
|
| 306 |
+
silence = AudioSegment.silent(duration=1500) # 1500 ms = 1.5 secondes
|
| 307 |
+
|
| 308 |
+
for audio_bytes, _ in audio_list:
|
| 309 |
+
# Convertir les octets en un segment audio
|
| 310 |
+
segment = AudioSegment.from_mp3(io.BytesIO(audio_bytes))
|
| 311 |
+
|
| 312 |
+
# Ajouter le son de dรฉbut, le segment TTS, le son de fin et le silence
|
| 313 |
+
final_audio += begin_sound + segment + end_sound + silence
|
| 314 |
+
|
| 315 |
+
# Convertir le segment audio final en octets
|
| 316 |
+
buffer = io.BytesIO()
|
| 317 |
+
final_audio.export(buffer, format="mp3")
|
| 318 |
+
return buffer.getvalue()
|
| 319 |
+
except IOError as e:
|
| 320 |
+
print(f"Erreur lors de la lecture ou de l'รฉcriture des fichiers audio : {e}")
|
| 321 |
+
return None
|
| 322 |
+
except Exception as e:
|
| 323 |
+
print(f"Une erreur inattendue s'est produite : {e}")
|
| 324 |
+
return None
|
| 325 |
+
|
| 326 |
+
def process_message(
|
| 327 |
+
message: str,
|
| 328 |
+
operation_prompt: str = "",
|
| 329 |
+
tts_enabled: bool = False
|
| 330 |
+
) -> Tuple[Optional[bytes], Optional[float]]:
|
| 331 |
+
"""
|
| 332 |
+
Traite les messages des utilisateurs et gรฉnรจre une rรฉponse.
|
| 333 |
+
|
| 334 |
+
Args:
|
| 335 |
+
message (str): Le message d'entrรฉe de l'utilisateur.
|
| 336 |
+
operation_prompt (str, optional): Prompt supplรฉmentaire pour l'opรฉration. Par dรฉfaut "".
|
| 337 |
+
tts_enabled (bool, optional): Si la synthรจse vocale est activรฉe. Par dรฉfaut False.
|
| 338 |
+
|
| 339 |
+
Returns:
|
| 340 |
+
Tuple[Optional[bytes], Optional[float]]: Un tuple contenant l'audio TTS et sa durรฉe,
|
| 341 |
+
ou (None, None) si TTS est dรฉsactivรฉ ou en cas d'erreur.
|
| 342 |
+
"""
|
| 343 |
+
payload_content = f'{operation_prompt} :\n"""\n{message}\n"""'
|
| 344 |
+
|
| 345 |
+
st.session_state.messages.append({"role": "user", "content": payload_content})
|
| 346 |
+
with st.chat_message("user", avatar="๐ค"):
|
| 347 |
+
st.markdown(message)
|
| 348 |
+
|
| 349 |
+
with st.chat_message("assistant", avatar="๐ป"):
|
| 350 |
+
message_placeholder = st.empty()
|
| 351 |
+
full_response = ""
|
| 352 |
+
|
| 353 |
+
with st.status("Traitement en cours...", expanded=True) as status:
|
| 354 |
+
status.update(label="Gรฉnรฉration de la rรฉponse", state="running", expanded=True)
|
| 355 |
+
try:
|
| 356 |
+
for response in client.chat.completions.create(
|
| 357 |
+
model="gpt-4o-mini",
|
| 358 |
+
messages=st.session_state.messages,
|
| 359 |
+
stream=True,
|
| 360 |
+
temperature=0.1):
|
| 361 |
+
full_response += (response.choices[0].delta.content or "")
|
| 362 |
+
message_placeholder.markdown(full_response + "โ")
|
| 363 |
+
# Utiliser regex pour supprimer les trois premiers et derniers guillemets doubles
|
| 364 |
+
full_response = re.sub(r'^"{3}|"{3}$', '', full_response.strip())
|
| 365 |
+
message_placeholder.markdown(full_response)
|
| 366 |
+
status.update(label="Rรฉponse gรฉnรฉrรฉe avec succรจs", state="complete")
|
| 367 |
+
except Exception as e:
|
| 368 |
+
st.error(f"Une erreur s'est produite lors de la gรฉnรฉration de la rรฉponse : {e}")
|
| 369 |
+
status.update(label="Erreur lors de la gรฉnรฉration de la rรฉponse", state="error")
|
| 370 |
+
return None, None
|
| 371 |
+
|
| 372 |
+
st.session_state.messages.append(
|
| 373 |
+
{"role": "assistant", "content": full_response}
|
| 374 |
+
)
|
| 375 |
+
|
| 376 |
+
if tts_enabled:
|
| 377 |
+
with st.status("Conversion texte-parole en cours...", expanded=True) as status:
|
| 378 |
+
status.update(label="Conversion texte-parole", state="running", expanded=True)
|
| 379 |
+
try:
|
| 380 |
+
tts_audio, tts_duration = text_to_speech(full_response)
|
| 381 |
+
status.update(label="Conversion texte-parole rรฉussie", state="complete")
|
| 382 |
+
return tts_audio, tts_duration
|
| 383 |
+
except Exception as e:
|
| 384 |
+
st.error(f"Une erreur s'est produite lors de la conversion texte-parole : {e}")
|
| 385 |
+
status.update(label="Erreur lors de la conversion texte-parole", state="error")
|
| 386 |
+
return None, None
|
| 387 |
+
return None, None
|
| 388 |
+
|
| 389 |
+
class GlobalSystemPrompts:
|
| 390 |
+
"""Class to store global system prompts."""
|
| 391 |
+
|
| 392 |
+
@staticmethod
|
| 393 |
+
def linguascribe():
|
| 394 |
+
"""
|
| 395 |
+
Retrieve the system prompt for the Linguascribe feature.
|
| 396 |
+
|
| 397 |
+
Returns:
|
| 398 |
+
str: The system prompt for Linguascribe.
|
| 399 |
+
"""
|
| 400 |
+
try:
|
| 401 |
+
system_prompt = read_file('linguascribe.prompt')
|
| 402 |
+
return system_prompt
|
| 403 |
+
except FileNotFoundError:
|
| 404 |
+
print("Le fichier 'linguascribe.prompt' n'a pas รฉtรฉ trouvรฉ.")
|
| 405 |
+
return ""
|
| 406 |
+
except IOError as e:
|
| 407 |
+
print(f"Erreur lors de la lecture du fichier 'linguascribe.prompt': {e}")
|
| 408 |
+
return ""
|
| 409 |
+
|
| 410 |
+
# Function to configure the translation mode
|
| 411 |
+
def set_translation_mode(from_lang: str, dest_lang: str) -> Tuple[str, str]:
|
| 412 |
+
"""
|
| 413 |
+
Configure les prompts globaux pour le mode de traduction.
|
| 414 |
+
|
| 415 |
+
Args:
|
| 416 |
+
from_lang (str): La langue source.
|
| 417 |
+
dest_lang (str): La langue de destination.
|
| 418 |
+
|
| 419 |
+
Returns:
|
| 420 |
+
Tuple[str, str]: Un tuple contenant le prompt systรจme et le prompt d'opรฉration.
|
| 421 |
+
"""
|
| 422 |
+
system_prompt = GlobalSystemPrompts.linguascribe()
|
| 423 |
+
operation_prompt = f"Translate({from_lang} to {dest_lang})"
|
| 424 |
+
return system_prompt, operation_prompt
|
| 425 |
+
|
| 426 |
+
# List of languages supported by the application
|
| 427 |
+
SUPPORTED_LANGUAGES = [
|
| 428 |
+
"Afrikaans", "Arabic", "Armenian", "Azerbaijani", "Belarusian", "Bosnian",
|
| 429 |
+
"Bulgarian", "Catalan", "Chinese", "Croatian", "Czech", "Danish", "Dutch",
|
| 430 |
+
"English", "Estonian", "Finnish", "French", "Galician", "German", "Greek",
|
| 431 |
+
"Hebrew", "Hindi", "Hungarian", "Icelandic", "Indonesian", "Italian",
|
| 432 |
+
"Japanese", "Kannada", "Kazakh", "Korean", "Latvian", "Lithuanian",
|
| 433 |
+
"Macedonian", "Malay", "Marathi", "Maori", "Nepali", "Norwegian", "Persian",
|
| 434 |
+
"Polish", "Portuguese", "Romanian", "Russian", "Serbian", "Slovak",
|
| 435 |
+
"Slovenian", "Spanish", "Swahili", "Swedish", "Tagalog", "Tamil", "Thai",
|
| 436 |
+
"Turkish", "Ukrainian", "Urdu", "Vietnamese", "Welsh"
|
| 437 |
+
]
|
| 438 |
+
|
| 439 |
+
LANGUAGES_EMOJI = {
|
| 440 |
+
"Afrikaans": "๐ฟ๐ฆ", "Arabic": "๐ธ๐ฆ", "Armenian": "๐ฆ๐ฒ", "Azerbaijani": "๐ฆ๐ฟ", "Belarusian": "๐ง๐พ",
|
| 441 |
+
"Bosnian": "๐ง๐ฆ", "Bulgarian": "๐ง๐ฌ", "Catalan": "๐ช๐ธ", "Chinese": "๐จ๐ณ", "Croatian": "๐ญ๐ท",
|
| 442 |
+
"Czech": "๐จ๐ฟ", "Danish": "๐ฉ๐ฐ", "Dutch": "๐ณ๐ฑ", "English": "๐ฌ๐ง", "Estonian": "๐ช๐ช",
|
| 443 |
+
"Finnish": "๐ซ๐ฎ", "French": "๐ซ๐ท", "Galician": "๐ช๐ธ", "German": "๐ฉ๐ช", "Greek": "๐ฌ๐ท",
|
| 444 |
+
"Hebrew": "๐ฎ๐ฑ", "Hindi": "๐ฎ๐ณ", "Hungarian": "๐ญ๐บ", "Icelandic": "๐ฎ๐ธ", "Indonesian": "๐ฎ๐ฉ",
|
| 445 |
+
"Italian": "๐ฎ๐น", "Japanese": "๐ฏ๐ต", "Kannada": "๐ฎ๐ณ", "Kazakh": "๐ฐ๐ฟ", "Korean": "๐ฐ๐ท",
|
| 446 |
+
"Latvian": "๐ฑ๐ป", "Lithuanian": "๐ฑ๐น", "Macedonian": "๐ฒ๐ฐ", "Malay": "๐ฒ๐พ", "Marathi": "๐ฎ๐ณ",
|
| 447 |
+
"Maori": "๐ณ๐ฟ", "Nepali": "๐ณ๐ต", "Norwegian": "๐ณ๐ด", "Persian": "๐ฎ๐ท", "Polish": "๐ต๐ฑ",
|
| 448 |
+
"Portuguese": "๐ต๐น", "Romanian": "๐ท๐ด", "Russian": "๐ท๐บ", "Serbian": "๐ท๐ธ", "Slovak": "๐ธ๐ฐ",
|
| 449 |
+
"Slovenian": "๐ธ๐ฎ", "Spanish": "๐ช๐ธ", "Swahili": "๐ฐ๐ช", "Swedish": "๐ธ๐ช", "Tagalog": "๐ต๐ญ",
|
| 450 |
+
"Tamil": "๐ฎ๐ณ", "Thai": "๐น๐ญ", "Turkish": "๐น๐ท", "Ukrainian": "๐บ๐ฆ", "Urdu": "๐ต๐ฐ",
|
| 451 |
+
"Vietnamese": "๐ป๐ณ", "Welsh": "๐ด๓ ง๓ ข๓ ท๓ ฌ๓ ณ๓ ฟ"
|
| 452 |
+
}
|
| 453 |
|
| 454 |
+
def convert_language_name_to_iso6391(language_data: Union[str, Dict[str, str]]) -> str:
|
| 455 |
+
"""
|
| 456 |
+
Convertit un nom de langue en son code ISO 639-1.
|
| 457 |
+
|
| 458 |
+
Args:
|
| 459 |
+
language_data (Union[str, Dict[str, str]]): Le nom de la langue ou un dictionnaire
|
| 460 |
+
contenant le nom de la langue.
|
| 461 |
+
|
| 462 |
+
Returns:
|
| 463 |
+
str: Le code ISO 639-1 pour la langue donnรฉe, ou 'en' si non trouvรฉ.
|
| 464 |
+
"""
|
| 465 |
+
# Dictionnaire associant les noms de langues aux codes ISO 639-1
|
| 466 |
+
language_to_iso: Dict[str, str] = {
|
| 467 |
+
"Afrikaans": "af", "Arabic": "ar", "Armenian": "hy", "Azerbaijani": "az",
|
| 468 |
+
"Belarusian": "be", "Bosnian": "bs", "Bulgarian": "bg", "Catalan": "ca",
|
| 469 |
+
"Chinese": "zh", "Croatian": "hr", "Czech": "cs", "Danish": "da",
|
| 470 |
+
"Dutch": "nl", "English": "en", "Estonian": "et", "Finnish": "fi",
|
| 471 |
+
"French": "fr", "Galician": "gl", "German": "de", "Greek": "el",
|
| 472 |
+
"Hebrew": "he", "Hindi": "hi", "Hungarian": "hu", "Icelandic": "is",
|
| 473 |
+
"Indonesian": "id", "Italian": "it", "Japanese": "ja", "Kannada": "kn",
|
| 474 |
+
"Kazakh": "kk", "Korean": "ko", "Latvian": "lv", "Lithuanian": "lt",
|
| 475 |
+
"Macedonian": "mk", "Malay": "ms", "Marathi": "mr", "Maori": "mi",
|
| 476 |
+
"Nepali": "ne", "Norwegian": "no", "Persian": "fa", "Polish": "pl",
|
| 477 |
+
"Portuguese": "pt", "Romanian": "ro", "Russian": "ru", "Serbian": "sr",
|
| 478 |
+
"Slovak": "sk", "Slovenian": "sl", "Spanish": "es", "Swahili": "sw",
|
| 479 |
+
"Swedish": "sv", "Tagalog": "tl", "Tamil": "ta", "Thai": "th",
|
| 480 |
+
"Turkish": "tr", "Ukrainian": "uk", "Urdu": "ur", "Vietnamese": "vi",
|
| 481 |
+
"Welsh": "cy"
|
| 482 |
+
}
|
| 483 |
|
| 484 |
+
# Vรฉrifier si language_data est un dictionnaire
|
| 485 |
+
if isinstance(language_data, dict):
|
| 486 |
+
language_name = language_data.get('language', '')
|
| 487 |
+
else:
|
| 488 |
+
language_name = language_data
|
| 489 |
+
|
| 490 |
+
try:
|
| 491 |
+
# Retourner le code ISO 639-1 correspondant au nom de la langue
|
| 492 |
+
return language_to_iso[language_name]
|
| 493 |
+
except KeyError:
|
| 494 |
+
# Gรฉrer spรฉcifiquement l'exception KeyError
|
| 495 |
+
print(f"Langue non trouvรฉe : {language_name}")
|
| 496 |
+
return "en" # Par dรฉfaut 'en' si la langue n'est pas trouvรฉe
|
| 497 |
+
|
| 498 |
+
def on_languages_change() -> None:
|
| 499 |
+
"""Fonction de rappel pour le changement de langue(s) de destination."""
|
| 500 |
+
selected_language_names: List[str] = st.session_state.language_selector
|
| 501 |
+
st.session_state.selected_languages = [
|
| 502 |
+
{"language": lang, "iso-639-1": convert_language_name_to_iso6391(lang)}
|
| 503 |
+
for lang in selected_language_names
|
| 504 |
+
]
|
| 505 |
+
|
| 506 |
+
|
| 507 |
+
def init_process_mode() -> Tuple[str, str]:
|
| 508 |
+
"""
|
| 509 |
+
Initialise le mode de traitement pour la traduction si nรฉcessaire.
|
| 510 |
+
|
| 511 |
+
Returns:
|
| 512 |
+
Tuple[str, str]: Un tuple contenant le prompt systรจme et le prompt d'opรฉration.
|
| 513 |
+
"""
|
| 514 |
+
if st.session_state["process_mode"] == "translation":
|
| 515 |
+
system_prompt, operation_prompt = set_translation_mode(
|
| 516 |
+
from_lang=st.session_state.language_detected,
|
| 517 |
+
dest_lang=st.session_state.target_language
|
| 518 |
+
)
|
| 519 |
+
return system_prompt, operation_prompt
|
| 520 |
+
return "", ""
|
| 521 |
+
|
| 522 |
+
# Fonction principale de l'application
|
| 523 |
+
def main():
|
| 524 |
+
"""Fonction principale qui configure et exรฉcute l'application Streamlit."""
|
| 525 |
+
|
| 526 |
+
# Initialisation des variables d'รฉtat de session
|
| 527 |
+
if "ui_loaded" not in st.session_state:
|
| 528 |
+
st.session_state["ui_loaded"] = False
|
| 529 |
+
|
| 530 |
+
if "language_detected" not in st.session_state:
|
| 531 |
+
st.session_state["language_detected"] = None
|
| 532 |
+
|
| 533 |
+
if "process_mode" not in st.session_state:
|
| 534 |
+
st.session_state["process_mode"] = "translation"
|
| 535 |
+
|
| 536 |
+
if "target_language" not in st.session_state:
|
| 537 |
+
st.session_state.target_language = "en"
|
| 538 |
+
|
| 539 |
+
if "selected_languages" not in st.session_state:
|
| 540 |
+
st.session_state.selected_languages = [
|
| 541 |
+
{"language": "English", "iso-639-1": "en"}
|
| 542 |
+
]
|
| 543 |
+
|
| 544 |
+
if "interface_language" not in st.session_state:
|
| 545 |
+
st.session_state.interface_language = "French" # Langue par dรฉfaut
|
| 546 |
+
|
| 547 |
+
system_prompt, operation_prompt = init_process_mode()
|
| 548 |
+
|
| 549 |
+
# Initialisation de l'historique des messages avec le prompt systรจme
|
| 550 |
+
if "messages" not in st.session_state:
|
| 551 |
+
st.session_state.messages = []
|
| 552 |
+
|
| 553 |
+
# Vรฉrification de l'existence d'un message systรจme dans st.session_state.messages
|
| 554 |
+
if not any(message["role"] == "system" for message in st.session_state.messages):
|
| 555 |
+
st.session_state.messages.insert(0, {"role": "system", "content": system_prompt})
|
| 556 |
+
|
| 557 |
+
# Interface utilisateur pour le chat textuel
|
| 558 |
+
if user_input := st.chat_input(get_translation("entrez_message")):
|
| 559 |
+
# Traitement du message texte de l'utilisateur
|
| 560 |
+
if st.session_state.language_detected is None:
|
| 561 |
+
st.session_state.language_detected = detect_language(
|
| 562 |
+
input_text=user_input, temperature=0.01
|
| 563 |
+
)
|
| 564 |
+
|
| 565 |
+
audio_list = []
|
| 566 |
+
for cursor_selected_lang in st.session_state.selected_languages:
|
| 567 |
+
st.session_state.target_language = cursor_selected_lang["iso-639-1"]
|
| 568 |
+
|
| 569 |
+
# Initialisation du mode de traitement pour la langue cible actuelle
|
| 570 |
+
system_prompt, operation_prompt = init_process_mode()
|
| 571 |
+
|
| 572 |
+
# Traitement du message utilisateur pour la langue cible actuelle
|
| 573 |
+
try:
|
| 574 |
+
tts_audio, tts_duration = process_message(
|
| 575 |
+
user_input,
|
| 576 |
+
operation_prompt=f"{operation_prompt}",
|
| 577 |
+
tts_enabled=st.session_state.enable_tts_for_input_from_text_field
|
| 578 |
+
)
|
| 579 |
+
if tts_audio is not None:
|
| 580 |
+
audio_list.append((tts_audio, tts_duration))
|
| 581 |
+
except Exception as e:
|
| 582 |
+
st.error(f"Erreur lors du traitement du message : {str(e)}")
|
| 583 |
+
|
| 584 |
+
if audio_list:
|
| 585 |
+
try:
|
| 586 |
+
final_audio = concatenate_audio_files(audio_list)
|
| 587 |
+
with st.container(border=True):
|
| 588 |
+
st.audio(final_audio, format="audio/mp3", autoplay=st.session_state.autoplay_tts)
|
| 589 |
+
|
| 590 |
+
# Gรฉnรฉrer un nom de fichier unique
|
| 591 |
+
timestamp = time.strftime("%Y%m%d-%H%M%S")
|
| 592 |
+
langues = "_".join([lang["iso-639-1"] for lang in st.session_state.selected_languages])
|
| 593 |
+
nom_fichier = f"reponse_audio_{langues}_{timestamp}.mp3"
|
| 594 |
+
|
| 595 |
+
st.download_button(
|
| 596 |
+
label=f"๐ฅ {get_translation('telecharger_audio')}",
|
| 597 |
+
data=final_audio,
|
| 598 |
+
file_name=nom_fichier,
|
| 599 |
+
mime="audio/mp3",
|
| 600 |
+
use_container_width=True,
|
| 601 |
+
type="primary",
|
| 602 |
+
key=f"download_button_{langues}_{timestamp}",
|
| 603 |
+
)
|
| 604 |
+
except Exception as e:
|
| 605 |
+
st.error(f"Erreur lors de la concatรฉnation des fichiers audio : {str(e)}")
|
| 606 |
+
|
| 607 |
+
with st.container(border=True):
|
| 608 |
+
# Interface utilisateur pour l'enregistrement audio
|
| 609 |
+
st.write(f"๐ฃ๏ธ {get_translation('enregistrez_message')}")
|
| 610 |
+
if audio := audiorecorder(
|
| 611 |
+
start_prompt=get_translation("cliquez_enregistrer"),
|
| 612 |
+
stop_prompt=get_translation("cliquez_arreter"),
|
| 613 |
+
pause_prompt=get_translation("cliquez_pause"),
|
| 614 |
+
show_visualizer=True,
|
| 615 |
+
key="vocal_chat_input"
|
| 616 |
+
):
|
| 617 |
+
# Traitement de l'entrรฉe audio de l'utilisateur
|
| 618 |
+
if len(audio) > 0:
|
| 619 |
+
try:
|
| 620 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio:
|
| 621 |
+
audio.export(temp_audio.name, format="wav")
|
| 622 |
+
transcription = transcribe_audio(temp_audio, language=st.session_state.language_detected)
|
| 623 |
+
os.unlink(temp_audio.name) # Suppression du fichier temporaire
|
| 624 |
+
if st.session_state.language_detected is None:
|
| 625 |
+
st.session_state.language_detected = detect_language(
|
| 626 |
+
input_text=transcription, temperature=0.01
|
| 627 |
+
)
|
| 628 |
+
st.write(get_translation("langue_detectee").format(st.session_state.language_detected))
|
| 629 |
+
|
| 630 |
+
#st.write(get_translation("transcription").format(transcription))
|
| 631 |
+
|
| 632 |
+
audio_list = []
|
| 633 |
+
for cursor_selected_lang in st.session_state.selected_languages:
|
| 634 |
+
st.session_state.target_language = cursor_selected_lang["iso-639-1"]
|
| 635 |
+
|
| 636 |
+
# Initialisation du mode de traitement pour la langue cible actuelle
|
| 637 |
+
system_prompt, operation_prompt = init_process_mode()
|
| 638 |
+
|
| 639 |
+
# Traitement du message utilisateur pour la langue cible actuelle
|
| 640 |
+
try:
|
| 641 |
+
tts_audio, tts_duration = process_message(
|
| 642 |
+
transcription,
|
| 643 |
+
operation_prompt=f"{operation_prompt}",
|
| 644 |
+
tts_enabled=st.session_state.enable_tts_for_input_from_audio_record
|
| 645 |
+
)
|
| 646 |
+
if tts_audio is not None:
|
| 647 |
+
audio_list.append((tts_audio, tts_duration))
|
| 648 |
+
except Exception as e:
|
| 649 |
+
st.error(f"Erreur lors du traitement du message audio : {str(e)}")
|
| 650 |
+
if audio_list:
|
| 651 |
+
try:
|
| 652 |
+
final_audio = concatenate_audio_files(audio_list)
|
| 653 |
+
with st.container(border=True):
|
| 654 |
+
st.audio(final_audio, format="audio/mp3", autoplay=st.session_state.autoplay_tts)
|
| 655 |
+
# Ajout d'un bouton de tรฉlรฉchargement pour l'audio final
|
| 656 |
+
st.download_button(
|
| 657 |
+
label=f"๐ฅ {get_translation('telecharger_audio')}",
|
| 658 |
+
data=final_audio,
|
| 659 |
+
file_name="audio_concatene.mp3",
|
| 660 |
+
mime="audio/mp3"
|
| 661 |
+
)
|
| 662 |
+
except Exception as e:
|
| 663 |
+
st.error(f"Erreur lors de la concatรฉnation des fichiers audio : {str(e)}")
|
| 664 |
+
except Exception as e:
|
| 665 |
+
st.error(f"Erreur lors du traitement de l'audio : {str(e)}")
|
| 666 |
+
|
| 667 |
+
# Configuration de la barre latรฉrale
|
| 668 |
+
with st.sidebar:
|
| 669 |
+
st.logo("img/logo_2.png", icon_image="img/logo_2.png")
|
| 670 |
+
st.header(get_translation("sidebar_titre"))
|
| 671 |
+
|
| 672 |
+
with st.expander(f"{get_translation('a_propos')}",
|
| 673 |
+
expanded=False,
|
| 674 |
+
icon="โน๏ธ"):
|
| 675 |
+
st.subheader(f"version: {__version__}")
|
| 676 |
+
st.info(get_translation("info_app"))
|
| 677 |
+
|
| 678 |
+
with st.container(border=True):
|
| 679 |
+
st.subheader(get_translation("langue_interface"))
|
| 680 |
+
# Sรฉlection de la langue de l'interface
|
| 681 |
+
st.selectbox(
|
| 682 |
+
label=get_translation("choix_langue_interface"),
|
| 683 |
+
options=list(translations.keys()),
|
| 684 |
+
key="interface_language",
|
| 685 |
+
index=(
|
| 686 |
+
list(translations.keys()).index("French")
|
| 687 |
+
if "interface_language" not in st.session_state
|
| 688 |
+
else list(translations.keys()).index(st.session_state.interface_language)
|
| 689 |
+
),
|
| 690 |
+
format_func=lambda lang: f"{LANGUAGES_EMOJI.get(lang, '')} {lang}"
|
| 691 |
+
)
|
| 692 |
+
|
| 693 |
+
with st.expander(f"{get_translation('selection_langue')}",
|
| 694 |
+
expanded=True,
|
| 695 |
+
icon="๐"):
|
| 696 |
+
# Conteneur pour la sรฉlection de langue
|
| 697 |
+
|
| 698 |
+
# Sรฉlection multiple des langues de destination
|
| 699 |
+
st.multiselect(
|
| 700 |
+
label=get_translation("langues_destination"),
|
| 701 |
+
placeholder=get_translation("placeholder_langues"),
|
| 702 |
+
options=SUPPORTED_LANGUAGES,
|
| 703 |
+
default=["English"],
|
| 704 |
+
key="language_selector",
|
| 705 |
+
max_selections=4,
|
| 706 |
+
on_change=on_languages_change,
|
| 707 |
+
format_func=lambda lang: f"{LANGUAGES_EMOJI.get(lang, '')} {lang}"
|
| 708 |
+
)
|
| 709 |
|
| 710 |
+
with st.expander(f"{get_translation('parametres_tts')}",
|
| 711 |
+
expanded=True,
|
| 712 |
+
icon="๐"):
|
| 713 |
+
st.selectbox(
|
| 714 |
+
get_translation("choix_voix_tts"),
|
| 715 |
+
options=["alloy", "echo", "fable", "onyx", "nova", "shimmer"],
|
| 716 |
+
index=3, # "onyx" est ร l'index 3
|
| 717 |
+
key="tts_voice"
|
| 718 |
+
)
|
| 719 |
+
st.checkbox(
|
| 720 |
+
get_translation("activer_tts_texte"),
|
| 721 |
+
key="enable_tts_for_input_from_text_field",
|
| 722 |
+
value=True
|
| 723 |
+
)
|
| 724 |
+
st.checkbox(
|
| 725 |
+
get_translation("activer_tts_audio"),
|
| 726 |
+
key="enable_tts_for_input_from_audio_record",
|
| 727 |
+
value=True
|
| 728 |
+
)
|
| 729 |
+
st.checkbox(
|
| 730 |
+
get_translation("lecture_auto_tts"),
|
| 731 |
+
key="autoplay_tts",
|
| 732 |
+
value=True
|
| 733 |
+
)
|
| 734 |
|
| 735 |
# Point d'entrรฉe de l'application
|
| 736 |
if __name__ == "__main__":
|
core/DetectLanguage.py
DELETED
|
@@ -1,153 +0,0 @@
|
|
| 1 |
-
#coding: utf-8
|
| 2 |
-
from os import getenv
|
| 3 |
-
from typing import Optional
|
| 4 |
-
import requests
|
| 5 |
-
import streamlit as st
|
| 6 |
-
|
| 7 |
-
from openai import OpenAI
|
| 8 |
-
|
| 9 |
-
def detect_language(input_text: str,
|
| 10 |
-
temperature: Optional[float] = 0.2,
|
| 11 |
-
context_window: Optional[int] = 128,
|
| 12 |
-
model: Optional[str] = "gpt-4o-mini"
|
| 13 |
-
) -> str:
|
| 14 |
-
"""
|
| 15 |
-
Dรฉtecte la langue d'un texte donnรฉ avec une vรฉrification croisรฉe si doute.
|
| 16 |
-
|
| 17 |
-
Args:
|
| 18 |
-
input_text (str): Le texte dont la langue doit รชtre dรฉtectรฉe.
|
| 19 |
-
temperature (Optional[float]): Paramรจtre de tempรฉrature pour l'API. Par dรฉfaut 0.2.
|
| 20 |
-
context_window (Optional[int]): Taille de la fenรชtre de contexte. Par dรฉfaut 128.
|
| 21 |
-
model (Optional[str]): Modรจle ร utiliser. Par dรฉfaut "gpt-4o-mini".
|
| 22 |
-
|
| 23 |
-
Returns:
|
| 24 |
-
str: Code de langue ISO-639-1 dรฉtectรฉ.
|
| 25 |
-
|
| 26 |
-
Raises:
|
| 27 |
-
ValueError: Si le texte est vide ou invalide.
|
| 28 |
-
requests.RequestException: Si une erreur de communication avec l'API se produit.
|
| 29 |
-
"""
|
| 30 |
-
if not input_text or not isinstance(input_text, str):
|
| 31 |
-
st.error("Erreur : Le texte ร analyser est vide ou invalide")
|
| 32 |
-
return "en" # Fallback sur l'anglais par dรฉfaut
|
| 33 |
-
|
| 34 |
-
system_prompt = (
|
| 35 |
-
"Agissez comme une fonction de dรฉtection de langue. "
|
| 36 |
-
"Fournissez la langue d'entrรฉe au format ISO-639-1. "
|
| 37 |
-
"Si vous avez un doute ou une incertitude, indiquez 'doute'."
|
| 38 |
-
)
|
| 39 |
-
|
| 40 |
-
try:
|
| 41 |
-
# Vรฉrifier la clรฉ API
|
| 42 |
-
api_key = getenv("OPENAI_API_KEY")
|
| 43 |
-
if not api_key:
|
| 44 |
-
raise ValueError("Clรฉ API OpenAI non trouvรฉe dans les variables d'environnement")
|
| 45 |
-
|
| 46 |
-
client = OpenAI(api_key=api_key)
|
| 47 |
-
|
| 48 |
-
# Appel ร l'API avec gestion du timeout
|
| 49 |
-
try:
|
| 50 |
-
response = client.chat.completions.create(
|
| 51 |
-
model=model,
|
| 52 |
-
temperature=temperature,
|
| 53 |
-
messages=[
|
| 54 |
-
{
|
| 55 |
-
"role": "system",
|
| 56 |
-
"content": system_prompt
|
| 57 |
-
},
|
| 58 |
-
{
|
| 59 |
-
"role": "user",
|
| 60 |
-
"content": input_text
|
| 61 |
-
}
|
| 62 |
-
],
|
| 63 |
-
max_tokens=context_window,
|
| 64 |
-
timeout=10 # Timeout de 10 secondes
|
| 65 |
-
)
|
| 66 |
-
except requests.Timeout:
|
| 67 |
-
st.warning("Dรฉlai d'attente dรฉpassรฉ pour la dรฉtection de langue, utilisation du modรจle de secours")
|
| 68 |
-
return call_precise_language_model(input_text)
|
| 69 |
-
|
| 70 |
-
if not response or not response.choices:
|
| 71 |
-
raise ValueError("Rรฉponse invalide de l'API")
|
| 72 |
-
|
| 73 |
-
detected_language = response.choices[0].message.content.strip()
|
| 74 |
-
|
| 75 |
-
# Vรฉrification de la validitรฉ du code de langue
|
| 76 |
-
if not detected_language or len(detected_language) < 2:
|
| 77 |
-
raise ValueError("Code de langue invalide reรงu de l'API")
|
| 78 |
-
|
| 79 |
-
# Si le modรจle indique un doute, faire appel ร un modรจle plus prรฉcis
|
| 80 |
-
if "doute" in detected_language.lower():
|
| 81 |
-
st.info("Doute sur la dรฉtection de langue, utilisation du modรจle de secours")
|
| 82 |
-
return call_precise_language_model(input_text)
|
| 83 |
-
|
| 84 |
-
return detected_language
|
| 85 |
-
|
| 86 |
-
except requests.RequestException as e:
|
| 87 |
-
st.error(f"Erreur de communication avec l'API : {e}")
|
| 88 |
-
return "en" # Fallback sur l'anglais
|
| 89 |
-
except ValueError as e:
|
| 90 |
-
st.error(f"Erreur de configuration : {e}")
|
| 91 |
-
return "en" # Fallback sur l'anglais
|
| 92 |
-
except Exception as e:
|
| 93 |
-
st.error(f"Erreur inattendue lors de la dรฉtection de la langue : {e}")
|
| 94 |
-
return "en" # Fallback sur l'anglais
|
| 95 |
-
|
| 96 |
-
def call_precise_language_model(input_text: str) -> str:
|
| 97 |
-
"""
|
| 98 |
-
Appelle un second modรจle plus prรฉcis pour vรฉrifier la langue en cas de doute.
|
| 99 |
-
"""
|
| 100 |
-
precise_prompt = (
|
| 101 |
-
"Agissez comme un dรฉtecteur de langue trรจs prรฉcis. "
|
| 102 |
-
"Fournissez la langue d'entrรฉe au format ISO-639-1. "
|
| 103 |
-
)
|
| 104 |
-
|
| 105 |
-
try:
|
| 106 |
-
client = OpenAI(api_key=getenv("OPENAI_API_KEY"))
|
| 107 |
-
|
| 108 |
-
# Modรจle plus prรฉcis
|
| 109 |
-
response = client.chat.completions.create(
|
| 110 |
-
model="gpt-4o", # Un modรจle plus lourd et prรฉcis
|
| 111 |
-
temperature=0.1,
|
| 112 |
-
messages=[
|
| 113 |
-
{
|
| 114 |
-
"role": "system",
|
| 115 |
-
"content": precise_prompt
|
| 116 |
-
},
|
| 117 |
-
{
|
| 118 |
-
"role": "user",
|
| 119 |
-
"content": input_text
|
| 120 |
-
}
|
| 121 |
-
],
|
| 122 |
-
max_tokens=128
|
| 123 |
-
)
|
| 124 |
-
|
| 125 |
-
precise_language = response.choices[0].message.content.strip()
|
| 126 |
-
return precise_language
|
| 127 |
-
|
| 128 |
-
except Exception as e:
|
| 129 |
-
raise ValueError(f"Erreur lors de la vรฉrification prรฉcise de la langue : {str(e)}")
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
if __name__ == "__main__":
|
| 133 |
-
input_text = "Bonjour, comment รงa va ?"
|
| 134 |
-
detected_language = detect_language(input_text)
|
| 135 |
-
print(f"La langue dรฉtectรฉe est : {detected_language}")
|
| 136 |
-
|
| 137 |
-
input_text = "Hello, how are you?"
|
| 138 |
-
detected_language = detect_language(input_text)
|
| 139 |
-
print(f"La langue dรฉtectรฉe est : {detected_language}")
|
| 140 |
-
|
| 141 |
-
input_text = "Hola, ยฟcรณmo estรกs?"
|
| 142 |
-
detected_language = detect_language(input_text)
|
| 143 |
-
print(f"La langue dรฉtectรฉe est : {detected_language}")
|
| 144 |
-
|
| 145 |
-
# maintenant des entrees plus longues, mais surtout plus ambigues, il faut comprendre la langue meme si le texte est ambigu ou melange des mots mirroirs ou des mots provenant de plusieurs langues
|
| 146 |
-
|
| 147 |
-
input_text = "Cool raoul, j'y peut rien man, c'est la vie, c'est comme รงa"
|
| 148 |
-
detected_language = detect_language(input_text)
|
| 149 |
-
print(f"La langue dรฉtectรฉe est : {detected_language}")
|
| 150 |
-
|
| 151 |
-
input_text = "Bordel de merde, le cousin s'est fait choper par les flics avec du shit sur lui."
|
| 152 |
-
detected_language = detect_language(input_text)
|
| 153 |
-
print(f"La langue dรฉtectรฉe est : {detected_language}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
core/__init__.py
DELETED
|
File without changes
|
core/audio_files.py
DELETED
|
@@ -1,142 +0,0 @@
|
|
| 1 |
-
#coding: utf-8
|
| 2 |
-
from pydub import AudioSegment
|
| 3 |
-
#from openai import OpenAI
|
| 4 |
-
#from io import BytesIO
|
| 5 |
-
#from typing import Any
|
| 6 |
-
#from typing import Dict
|
| 7 |
-
#from typing import IO
|
| 8 |
-
from typing import List
|
| 9 |
-
from typing import Optional
|
| 10 |
-
from typing import Tuple
|
| 11 |
-
from typing import Union
|
| 12 |
-
import base64
|
| 13 |
-
import io
|
| 14 |
-
import tempfile
|
| 15 |
-
import os
|
| 16 |
-
import streamlit as st
|
| 17 |
-
|
| 18 |
-
def concatenate_audio_files(audio_list: List[Tuple[Union[bytes, str], float]]) -> Optional[bytes]:
|
| 19 |
-
"""
|
| 20 |
-
Concatรจne une liste de fichiers audio avec des effets sonores.
|
| 21 |
-
|
| 22 |
-
Args:
|
| 23 |
-
audio_list (List[Tuple[Union[bytes, str], float]]): Liste de tuples contenant les donnรฉes audio
|
| 24 |
-
(en bytes ou base64) et leur durรฉe.
|
| 25 |
-
|
| 26 |
-
Returns:
|
| 27 |
-
Optional[bytes]: Donnรฉes audio concatรฉnรฉes ou None en cas d'erreur.
|
| 28 |
-
|
| 29 |
-
Raises:
|
| 30 |
-
ValueError: Si la liste d'audio est vide ou invalide.
|
| 31 |
-
IOError: Si une erreur se produit lors de la lecture/รฉcriture des fichiers.
|
| 32 |
-
"""
|
| 33 |
-
if not audio_list:
|
| 34 |
-
st.error("Erreur : Aucun fichier audio ร concatรฉner")
|
| 35 |
-
return None
|
| 36 |
-
|
| 37 |
-
final_audio = AudioSegment.empty()
|
| 38 |
-
temp_files = [] # Pour le nettoyage des fichiers temporaires
|
| 39 |
-
|
| 40 |
-
try:
|
| 41 |
-
# Charger les effets sonores
|
| 42 |
-
try:
|
| 43 |
-
begin_sound = AudioSegment.from_mp3(
|
| 44 |
-
"sound-effects/voice-message-play-begin/voice-message-play-begin-1.mp3"
|
| 45 |
-
)
|
| 46 |
-
end_sound = AudioSegment.from_mp3(
|
| 47 |
-
"sound-effects/voice-message-play-ending/voice-message-play-ending-1.mp3"
|
| 48 |
-
)
|
| 49 |
-
except IOError as e:
|
| 50 |
-
st.warning("Impossible de charger les effets sonores, continuation sans effets")
|
| 51 |
-
begin_sound = end_sound = AudioSegment.empty()
|
| 52 |
-
|
| 53 |
-
# 1.5 secondes de silence
|
| 54 |
-
silence = AudioSegment.silent(duration=1500)
|
| 55 |
-
|
| 56 |
-
for audio_data, _ in audio_list:
|
| 57 |
-
try:
|
| 58 |
-
# Convertir en bytes si c'est une chaรฎne base64
|
| 59 |
-
if isinstance(audio_data, str):
|
| 60 |
-
try:
|
| 61 |
-
audio_bytes = base64.b64decode(audio_data)
|
| 62 |
-
except Exception as e:
|
| 63 |
-
st.error(f"Erreur de dรฉcodage base64 : {e}")
|
| 64 |
-
continue
|
| 65 |
-
else:
|
| 66 |
-
audio_bytes = audio_data
|
| 67 |
-
|
| 68 |
-
# Crรฉer un fichier temporaire pour l'audio
|
| 69 |
-
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp3')
|
| 70 |
-
temp_files.append(temp_file.name)
|
| 71 |
-
temp_file.write(audio_bytes)
|
| 72 |
-
temp_file.close()
|
| 73 |
-
|
| 74 |
-
# Convertir les octets en un segment audio
|
| 75 |
-
segment = AudioSegment.from_mp3(temp_file.name)
|
| 76 |
-
|
| 77 |
-
# Ajouter le son de dรฉbut, le segment TTS, le son de fin et le silence
|
| 78 |
-
final_audio += begin_sound + segment + end_sound + silence
|
| 79 |
-
|
| 80 |
-
except Exception as e:
|
| 81 |
-
st.warning(f"Erreur lors du traitement d'un segment audio : {e}")
|
| 82 |
-
continue
|
| 83 |
-
|
| 84 |
-
if len(final_audio) == 0:
|
| 85 |
-
raise ValueError("Aucun segment audio n'a pu รชtre traitรฉ correctement")
|
| 86 |
-
|
| 87 |
-
# Convertir le segment audio final en octets
|
| 88 |
-
buffer = io.BytesIO()
|
| 89 |
-
final_audio.export(buffer, format="mp3")
|
| 90 |
-
return buffer.getvalue()
|
| 91 |
-
|
| 92 |
-
except ValueError as e:
|
| 93 |
-
st.error(f"Erreur de validation : {e}")
|
| 94 |
-
return None
|
| 95 |
-
except IOError as e:
|
| 96 |
-
st.error(f"Erreur lors de la lecture ou de l'รฉcriture des fichiers audio : {e}")
|
| 97 |
-
return None
|
| 98 |
-
except Exception as e:
|
| 99 |
-
st.error(f"Une erreur inattendue s'est produite : {e}")
|
| 100 |
-
return None
|
| 101 |
-
finally:
|
| 102 |
-
# Nettoyage des fichiers temporaires
|
| 103 |
-
for temp_file in temp_files:
|
| 104 |
-
try:
|
| 105 |
-
os.remove(temp_file)
|
| 106 |
-
except Exception:
|
| 107 |
-
pass
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
def split_audio(audio_file, max_size_mb: int = 25) -> List[bytes]:
|
| 111 |
-
"""
|
| 112 |
-
Divise un fichier audio en segments de taille maximale spรฉcifiรฉe.
|
| 113 |
-
|
| 114 |
-
Args:
|
| 115 |
-
audio_file: Fichier audio ouvert en mode binaire.
|
| 116 |
-
max_size_mb (int): Taille maximale de chaque segment en Mo.
|
| 117 |
-
|
| 118 |
-
Returns:
|
| 119 |
-
List[bytes]: Liste des segments audio divisรฉs sous forme de bytes.
|
| 120 |
-
"""
|
| 121 |
-
try:
|
| 122 |
-
audio_file.seek(0)
|
| 123 |
-
audio = AudioSegment.from_file(audio_file)
|
| 124 |
-
duration_ms = len(audio)
|
| 125 |
-
segment_duration_ms = int(
|
| 126 |
-
(max_size_mb * 1024 * 1024 * 8) /
|
| 127 |
-
(audio.frame_rate * audio.sample_width * audio.channels)
|
| 128 |
-
)
|
| 129 |
-
|
| 130 |
-
segments = []
|
| 131 |
-
for start in range(0, duration_ms, segment_duration_ms):
|
| 132 |
-
end = min(start + segment_duration_ms, duration_ms)
|
| 133 |
-
segment = audio[start:end]
|
| 134 |
-
|
| 135 |
-
with io.BytesIO() as buffer:
|
| 136 |
-
segment.export(buffer, format="mp3")
|
| 137 |
-
segments.append(buffer.getvalue())
|
| 138 |
-
|
| 139 |
-
return segments
|
| 140 |
-
except Exception as e:
|
| 141 |
-
print(f"Une erreur s'est produite lors de la division de l'audio : {e}")
|
| 142 |
-
return []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
core/audio_isolation.py
DELETED
|
@@ -1,66 +0,0 @@
|
|
| 1 |
-
#!/usr/bin/env python3
|
| 2 |
-
#coding: utf-8
|
| 3 |
-
|
| 4 |
-
# FEATURE:
|
| 5 |
-
# Removes background noise from audio
|
| 6 |
-
# REQUIREMENT:
|
| 7 |
-
# That use the ELEVENLABS API
|
| 8 |
-
|
| 9 |
-
from typing import Optional
|
| 10 |
-
from typing import Union
|
| 11 |
-
from typing import IO
|
| 12 |
-
from typing import List
|
| 13 |
-
from typing import Dict
|
| 14 |
-
from typing import Any
|
| 15 |
-
import requests
|
| 16 |
-
import json
|
| 17 |
-
import os
|
| 18 |
-
import tempfile
|
| 19 |
-
from io import BytesIO
|
| 20 |
-
from pydub import AudioSegment
|
| 21 |
-
|
| 22 |
-
from dotenv import load_dotenv
|
| 23 |
-
from elevenlabs import ElevenLabs
|
| 24 |
-
|
| 25 |
-
def isolate_audio(fichier_audio: str):
|
| 26 |
-
load_dotenv()
|
| 27 |
-
client = ElevenLabs(api_key=os.getenv("ELEVENLABS_API_KEY"))
|
| 28 |
-
buffer = BytesIO()
|
| 29 |
-
|
| 30 |
-
with open(fichier_audio, 'rb') as audio_file:
|
| 31 |
-
isolated_audio_iterator = client.audio_isolation.audio_isolation(audio=audio_file)
|
| 32 |
-
|
| 33 |
-
for chunk in isolated_audio_iterator:
|
| 34 |
-
buffer.write(chunk)
|
| 35 |
-
|
| 36 |
-
buffer.seek(0)
|
| 37 |
-
return buffer
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
#if __name__ == "__main__":
|
| 41 |
-
# buffer = isolate_audio("audio.mp3")
|
| 42 |
-
# with open("audio_isole.mp3", "wb") as output_file:
|
| 43 |
-
# output_file.write(buffer.read())
|
| 44 |
-
|
| 45 |
-
def isolate_audio(audio: AudioSegment) -> AudioSegment:
|
| 46 |
-
"""
|
| 47 |
-
Isoler la voix dans un extrait audio.
|
| 48 |
-
|
| 49 |
-
Args:
|
| 50 |
-
audio (AudioSegment): L'audio original.
|
| 51 |
-
|
| 52 |
-
Returns:
|
| 53 |
-
AudioSegment: L'audio avec la voix isolรฉe.
|
| 54 |
-
"""
|
| 55 |
-
# Implรฉmentez ici votre algorithme d'isolation
|
| 56 |
-
# Par exemple, utiliser un filtre passe-bande pour conserver les frรฉquences vocales
|
| 57 |
-
# Ceci est un exemple simplifiรฉ
|
| 58 |
-
|
| 59 |
-
# Dรฉfinir les frรฉquences de coupure pour la voix humaine
|
| 60 |
-
low_freq = 80 # Frรฉquence minimale en Hz
|
| 61 |
-
high_freq = 3000 # Frรฉquence maximale en Hz
|
| 62 |
-
|
| 63 |
-
# Appliquer le filtre passe-bande
|
| 64 |
-
isolated_audio = audio.low_pass_filter(high_freq).high_pass_filter(low_freq)
|
| 65 |
-
|
| 66 |
-
return isolated_audio
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
core/clients_connection_check.py
DELETED
|
@@ -1,11 +0,0 @@
|
|
| 1 |
-
#coding: utf-8
|
| 2 |
-
|
| 3 |
-
def check_openai_api_key(api_key):
|
| 4 |
-
import openai
|
| 5 |
-
client = openai.OpenAI(api_key=api_key)
|
| 6 |
-
try:
|
| 7 |
-
client.models.list()
|
| 8 |
-
except openai.AuthenticationError:
|
| 9 |
-
return False
|
| 10 |
-
else:
|
| 11 |
-
return True
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
core/converter.py
DELETED
|
@@ -1,99 +0,0 @@
|
|
| 1 |
-
#coding: utf-8
|
| 2 |
-
|
| 3 |
-
from typing import Optional
|
| 4 |
-
from typing import Union
|
| 5 |
-
from typing import Dict
|
| 6 |
-
from typing import Any
|
| 7 |
-
from typing import List
|
| 8 |
-
|
| 9 |
-
def convert_language_name_to_iso6391(language_data: Union[str, Dict[str, str]]) -> str:
|
| 10 |
-
"""
|
| 11 |
-
Convertit un nom de langue en son code ISO 639-1.
|
| 12 |
-
|
| 13 |
-
Args:
|
| 14 |
-
language_data (Union[str, Dict[str, str]]): Le nom de la langue ou un dictionnaire
|
| 15 |
-
contenant le nom de la langue.
|
| 16 |
-
|
| 17 |
-
Returns:
|
| 18 |
-
str: Le code ISO 639-1 pour la langue donnรฉe, ou 'en' si non trouvรฉ.
|
| 19 |
-
"""
|
| 20 |
-
# Dictionnaire associant les noms de langues aux codes ISO 639-1
|
| 21 |
-
language_to_iso: Dict[str, str] = {
|
| 22 |
-
"Afrikaans": "af", "Arabic": "ar", "Armenian": "hy", "Azerbaijani": "az",
|
| 23 |
-
"Belarusian": "be", "Bosnian": "bs", "Bulgarian": "bg", "Catalan": "ca",
|
| 24 |
-
"Chinese": "zh", "Croatian": "hr", "Czech": "cs", "Danish": "da",
|
| 25 |
-
"Dutch": "nl", "English": "en", "Estonian": "et", "Finnish": "fi",
|
| 26 |
-
"French": "fr", "Galician": "gl", "German": "de", "Greek": "el",
|
| 27 |
-
"Hebrew": "he", "Hindi": "hi", "Hungarian": "hu", "Icelandic": "is",
|
| 28 |
-
"Indonesian": "id", "Italian": "it", "Japanese": "ja", "Kannada": "kn",
|
| 29 |
-
"Kazakh": "kk", "Korean": "ko", "Latvian": "lv", "Lithuanian": "lt",
|
| 30 |
-
"Macedonian": "mk", "Malay": "ms", "Marathi": "mr", "Maori": "mi",
|
| 31 |
-
"Nepali": "ne", "Norwegian": "no", "Persian": "fa", "Polish": "pl",
|
| 32 |
-
"Portuguese": "pt", "Romanian": "ro", "Russian": "ru", "Serbian": "sr",
|
| 33 |
-
"Slovak": "sk", "Slovenian": "sl", "Spanish": "es", "Swahili": "sw",
|
| 34 |
-
"Swedish": "sv", "Tagalog": "tl", "Tamil": "ta", "Thai": "th",
|
| 35 |
-
"Turkish": "tr", "Ukrainian": "uk", "Urdu": "ur", "Vietnamese": "vi",
|
| 36 |
-
"Welsh": "cy"
|
| 37 |
-
}
|
| 38 |
-
default_ = "en"
|
| 39 |
-
|
| 40 |
-
# Vรฉrifier si language_data est un dictionnaire
|
| 41 |
-
if isinstance(language_data, dict):
|
| 42 |
-
language_name = language_data.get('language', '')
|
| 43 |
-
else:
|
| 44 |
-
language_name = language_data
|
| 45 |
-
|
| 46 |
-
try:
|
| 47 |
-
# Retourner le code ISO 639-1 correspondant au nom de la langue
|
| 48 |
-
return language_to_iso[language_name]
|
| 49 |
-
except KeyError:
|
| 50 |
-
if f"{language_name}" in language_to_iso.values():
|
| 51 |
-
return language_name
|
| 52 |
-
else:
|
| 53 |
-
# Gรฉrer spรฉcifiquement l'exception KeyError
|
| 54 |
-
print(f"Langue non trouvรฉe : {language_name}")
|
| 55 |
-
return default_
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
def convert_iso6391_to_language_name(language_code: str,
|
| 60 |
-
filter_mode=True) -> str:
|
| 61 |
-
"""
|
| 62 |
-
Convertit un code ISO 639-1 en nom de langue.
|
| 63 |
-
|
| 64 |
-
Args:
|
| 65 |
-
language_code (str): Le code ISO 639-1 de la langue.
|
| 66 |
-
|
| 67 |
-
Returns:
|
| 68 |
-
str: Le nom de la langue correspondant au code ISO 639-1, ou 'English' si non trouvรฉ.
|
| 69 |
-
"""
|
| 70 |
-
# Dictionnaire associant les codes ISO 639-1 aux noms de langues
|
| 71 |
-
iso_to_language: Dict[str, str] = {
|
| 72 |
-
"af": "Afrikaans", "ar": "Arabic", "hy": "Armenian", "az": "Azerbaijani",
|
| 73 |
-
"be": "Belarusian", "bs": "Bosnian", "bg": "Bulgarian", "ca": "Catalan",
|
| 74 |
-
"zh": "Chinese", "hr": "Croatian", "cs": "Czech", "da": "Danish",
|
| 75 |
-
"nl": "Dutch", "en": "English", "et": "Estonian", "fi": "Finnish",
|
| 76 |
-
"fr": "French", "gl": "Galician", "de": "German", "el": "Greek",
|
| 77 |
-
"he": "Hebrew", "hi": "Hindi", "hu": "Hungarian", "is": "Icelandic",
|
| 78 |
-
"id": "Indonesian", "it": "Italian", "ja": "Japanese", "kn": "Kannada",
|
| 79 |
-
"kk": "Kazakh", "ko": "Korean", "lv": "Latvian", "lt": "Lithuanian",
|
| 80 |
-
"mk": "Macedonian", "ms": "Malay", "mr": "Marathi", "mi": "Maori",
|
| 81 |
-
"ne": "Nepali", "no": "Norwegian", "fa": "Persian", "pl": "Polish",
|
| 82 |
-
"pt": "Portuguese", "ro": "Romanian", "ru": "Russian", "sr": "Serbian",
|
| 83 |
-
"sk": "Slovak", "sl": "Slovenian", "es": "Spanish", "sw": "Swahili",
|
| 84 |
-
"sv": "Swedish", "tl": "Tagalog", "ta": "Tamil", "th": "Thai",
|
| 85 |
-
"tr": "Turkish", "uk": "Ukrainian", "ur": "Urdu", "vi": "Vietnamese",
|
| 86 |
-
"cy": "Welsh"
|
| 87 |
-
}
|
| 88 |
-
default_ = "English"
|
| 89 |
-
|
| 90 |
-
try:
|
| 91 |
-
# Retourner le nom de la langue correspondant au code ISO 639-1
|
| 92 |
-
return iso_to_language[language_code]
|
| 93 |
-
except KeyError:
|
| 94 |
-
if f"{language_code}" in iso_to_language.values():
|
| 95 |
-
return language_code
|
| 96 |
-
else:
|
| 97 |
-
# Gรฉrer spรฉcifiquement l'exception KeyError
|
| 98 |
-
print(f"Code de langue non trouvรฉ : {language_code}")
|
| 99 |
-
return default_
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
core/core.py
DELETED
|
@@ -1,108 +0,0 @@
|
|
| 1 |
-
#coding: utf-8
|
| 2 |
-
|
| 3 |
-
import re
|
| 4 |
-
from os import getenv
|
| 5 |
-
#from typing import Any
|
| 6 |
-
#from typing import Dict
|
| 7 |
-
#from typing import IO
|
| 8 |
-
from typing import List
|
| 9 |
-
from typing import Optional
|
| 10 |
-
from typing import Tuple
|
| 11 |
-
from typing import Union
|
| 12 |
-
from core.files import load_ui_language
|
| 13 |
-
from core.files import read_file
|
| 14 |
-
from core.demorrha import DemorrhaAssistant
|
| 15 |
-
from var_app import GlobalSystemPrompts
|
| 16 |
-
import streamlit as st
|
| 17 |
-
from openai import OpenAI
|
| 18 |
-
|
| 19 |
-
from dotenv import load_dotenv
|
| 20 |
-
# Charger les variables d'environnement depuis le fichier .env
|
| 21 |
-
load_dotenv()
|
| 22 |
-
|
| 23 |
-
class uiLang:
|
| 24 |
-
# Dictionary to store translations
|
| 25 |
-
translations = load_ui_language()
|
| 26 |
-
|
| 27 |
-
translations=uiLang.translations
|
| 28 |
-
|
| 29 |
-
client = OpenAI(api_key=getenv("OPENAI_API_KEY"))
|
| 30 |
-
|
| 31 |
-
def get_translation(key: str) -> str:
|
| 32 |
-
"""
|
| 33 |
-
Obtient la traduction pour une clรฉ donnรฉe basรฉe sur la langue d'interface sรฉlectionnรฉe.
|
| 34 |
-
"""
|
| 35 |
-
lang = st.session_state.get('interface_language', 'English')
|
| 36 |
-
return translations.get(lang, {}).get(key, key)
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
def process_message(
|
| 40 |
-
message: str,
|
| 41 |
-
operation_prompt_: Optional[str] = "",
|
| 42 |
-
system_prompt_: Optional[str] = ""
|
| 43 |
-
):
|
| 44 |
-
"""
|
| 45 |
-
Traite les messages des utilisateurs et gรฉnรจre une rรฉponse.
|
| 46 |
-
|
| 47 |
-
Args:
|
| 48 |
-
message (str): Le message d'entrรฉe de l'utilisateur.
|
| 49 |
-
operation_prompt (str, optional): Prompt supplรฉmentaire pour l'opรฉration. Par dรฉfaut "".
|
| 50 |
-
tts_enabled (bool, optional): Si la synthรจse vocale est activรฉe. Par dรฉfaut False.
|
| 51 |
-
|
| 52 |
-
"""
|
| 53 |
-
demorrha = DemorrhaAssistant()
|
| 54 |
-
demorrha.load_assistant("Demorrha")
|
| 55 |
-
demorrha.set_payload(f"{message}", f"{operation_prompt_}")
|
| 56 |
-
|
| 57 |
-
st.session_state["full_response"] = ""
|
| 58 |
-
try:
|
| 59 |
-
response_generator = demorrha.use_assistant()
|
| 60 |
-
for response in response_generator:
|
| 61 |
-
st.session_state.full_response = response
|
| 62 |
-
yield st.session_state.full_response
|
| 63 |
-
|
| 64 |
-
# Utiliser regex pour supprimer les trois premiers et derniers guillemets doubles
|
| 65 |
-
st.session_state.full_response = re.sub(r'^"{3}|"{3}$', '', st.session_state.full_response.strip())
|
| 66 |
-
st.session_state.messages = []
|
| 67 |
-
yield st.session_state.full_response
|
| 68 |
-
except Exception as e:
|
| 69 |
-
st.error(f"Une erreur s'est produite lors de la gรฉnรฉration de la rรฉponse : {e}")
|
| 70 |
-
return ""
|
| 71 |
-
|
| 72 |
-
# Function to configure the translation mode
|
| 73 |
-
def set_translation_mode(from_lang: str, dest_lang: str) -> Tuple[str, str]:
|
| 74 |
-
"""
|
| 75 |
-
Configure les prompts globaux pour le mode de traduction.
|
| 76 |
-
|
| 77 |
-
Args:
|
| 78 |
-
from_lang (str): La langue source.
|
| 79 |
-
dest_lang (str): La langue de destination.
|
| 80 |
-
|
| 81 |
-
Returns:
|
| 82 |
-
Tuple[str, str]: Un tuple contenant le prompt systรจme et le prompt d'opรฉration.
|
| 83 |
-
"""
|
| 84 |
-
system_prompt_ = GlobalSystemPrompts.linguascribe()
|
| 85 |
-
operation_prompt_ = f"Translate({from_lang} to {dest_lang})"
|
| 86 |
-
return system_prompt_, operation_prompt_
|
| 87 |
-
|
| 88 |
-
def init_process_mode(
|
| 89 |
-
from_lang: str,
|
| 90 |
-
to_lang: str,
|
| 91 |
-
process_mode: Optional[ Union[str, List[str]]] = "translator"
|
| 92 |
-
) -> Tuple[str, str]:
|
| 93 |
-
"""
|
| 94 |
-
Initialise le mode de traitement pour la traduction si nรฉcessaire.
|
| 95 |
-
|
| 96 |
-
Returns:
|
| 97 |
-
Tuple[str, str]: Un tuple contenant le prompt systรจme et le prompt d'opรฉration.
|
| 98 |
-
"""
|
| 99 |
-
# from (e.g.: st.session_state.language_detected)
|
| 100 |
-
# dest (e.g.: st.session_state.target_language)
|
| 101 |
-
if "translator" in process_mode:
|
| 102 |
-
system_prompt, operation_prompt = set_translation_mode(
|
| 103 |
-
from_lang=f"{from_lang}",
|
| 104 |
-
dest_lang=f"{to_lang}"
|
| 105 |
-
)
|
| 106 |
-
return system_prompt, operation_prompt
|
| 107 |
-
return "", ""
|
| 108 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
core/demorrha.py
DELETED
|
@@ -1,216 +0,0 @@
|
|
| 1 |
-
#coding: utf-8
|
| 2 |
-
|
| 3 |
-
import re
|
| 4 |
-
from os import getenv
|
| 5 |
-
from typing import Any
|
| 6 |
-
from typing import Dict
|
| 7 |
-
from typing import IO
|
| 8 |
-
from typing import List
|
| 9 |
-
from typing import Optional
|
| 10 |
-
from typing import Tuple
|
| 11 |
-
from typing import Union
|
| 12 |
-
from var_app import GlobalSystemPrompts
|
| 13 |
-
import streamlit as st
|
| 14 |
-
from openai import OpenAI
|
| 15 |
-
|
| 16 |
-
from dotenv import load_dotenv
|
| 17 |
-
# Charger les variables d'environnement depuis le fichier .env
|
| 18 |
-
|
| 19 |
-
class DemorrhaAssistant(object):
|
| 20 |
-
def __init__(self):
|
| 21 |
-
load_dotenv()
|
| 22 |
-
self.client = self.initialize_client()
|
| 23 |
-
self.vector_store = None
|
| 24 |
-
self.list_vector_store_ids = []
|
| 25 |
-
|
| 26 |
-
def initialize_client(self):
|
| 27 |
-
# Initialiser le client OpenAI avec la clรฉ API
|
| 28 |
-
api_key = getenv("OPENAI_API_KEY")
|
| 29 |
-
return OpenAI(api_key=api_key)
|
| 30 |
-
|
| 31 |
-
def search_assistant(self, assistant_name="Demorrha"):
|
| 32 |
-
last_id = None
|
| 33 |
-
while True:
|
| 34 |
-
# Lister tous les assistants
|
| 35 |
-
assistants_list = self.client.beta.assistants.list(
|
| 36 |
-
order="desc",
|
| 37 |
-
limit="20",
|
| 38 |
-
after=last_id
|
| 39 |
-
)
|
| 40 |
-
|
| 41 |
-
if len(assistants_list.data) < 1:
|
| 42 |
-
break
|
| 43 |
-
|
| 44 |
-
last_id = assistants_list.data[-1].id
|
| 45 |
-
for assistant in assistants_list.data:
|
| 46 |
-
if assistant.name == assistant_name:
|
| 47 |
-
return assistant.id
|
| 48 |
-
return None
|
| 49 |
-
|
| 50 |
-
def search_vector_store(self, vector_store_name="Demorrha_Style"):
|
| 51 |
-
last_id=None
|
| 52 |
-
while True:
|
| 53 |
-
# Lister tous les assistants
|
| 54 |
-
vector_store_list = self.client.beta.vector_stores.list(
|
| 55 |
-
order="desc",
|
| 56 |
-
limit="20",
|
| 57 |
-
after=last_id
|
| 58 |
-
)
|
| 59 |
-
|
| 60 |
-
if len(vector_store_list.data) < 1:
|
| 61 |
-
break
|
| 62 |
-
last_id = vector_store_list.data[-1].id
|
| 63 |
-
for vector_store in vector_store_list.data:
|
| 64 |
-
if vector_store.name == f"{vector_store_name}":
|
| 65 |
-
return vector_store.id
|
| 66 |
-
return None
|
| 67 |
-
|
| 68 |
-
def load_vector_store(self, vector_store_name="Demorrha_Style"):
|
| 69 |
-
vector_store_id = self.search_vector_store(vector_store_name)
|
| 70 |
-
if vector_store_id is None:
|
| 71 |
-
vector_store = self.client.beta.vector_stores.create(name=f"{vector_store_name}")
|
| 72 |
-
self.vector_store = vector_store
|
| 73 |
-
else:
|
| 74 |
-
self.vector_store = self.client.beta.vector_stores.retrieve(vector_store_id)
|
| 75 |
-
return self
|
| 76 |
-
|
| 77 |
-
def get_vector_store(self):
|
| 78 |
-
return self.vector_store
|
| 79 |
-
|
| 80 |
-
def upload_file(self,
|
| 81 |
-
file_path,
|
| 82 |
-
purpose="assistants"):
|
| 83 |
-
return self.client.files.create(
|
| 84 |
-
file=open(file_path, "rb"),
|
| 85 |
-
purpose=purpose
|
| 86 |
-
)
|
| 87 |
-
|
| 88 |
-
def list_files_in_vector_store(self, vector_store_id):
|
| 89 |
-
files_list = []
|
| 90 |
-
last_id=None
|
| 91 |
-
while True:
|
| 92 |
-
files_list = self.client.beta.vector_stores.files.list(
|
| 93 |
-
vector_store_id=vector_store_id,
|
| 94 |
-
limit="20",
|
| 95 |
-
after=last_id
|
| 96 |
-
)
|
| 97 |
-
|
| 98 |
-
if len(files_list.data) < 1:
|
| 99 |
-
break
|
| 100 |
-
last_id = files_list.data[-1].id
|
| 101 |
-
for file in files_list.data:
|
| 102 |
-
files_list.append(file)
|
| 103 |
-
return files_list
|
| 104 |
-
|
| 105 |
-
def attach_file_to_vectore_store(self,
|
| 106 |
-
vector_store_id,
|
| 107 |
-
file_id):
|
| 108 |
-
return self.client.beta.vector_stores.files.create(
|
| 109 |
-
vector_store_id=vector_store_id,
|
| 110 |
-
file_id=file_id
|
| 111 |
-
)
|
| 112 |
-
|
| 113 |
-
def load_assistant(self, assistant_name="Demorrha"):
|
| 114 |
-
self.set_system_prompt(GlobalSystemPrompts.linguascribe())
|
| 115 |
-
# system_prompt = GlobalSystemPrompts.linguascribe()
|
| 116 |
-
assistant_id = self.search_assistant(assistant_name)
|
| 117 |
-
if assistant_id is None:
|
| 118 |
-
self.assistant = self.client.beta.assistants.create(
|
| 119 |
-
model="gpt-4o-mini",
|
| 120 |
-
name="Demorrha",
|
| 121 |
-
description="Traite les messages des utilisateurs et gรฉnรจre une traduction.",
|
| 122 |
-
instructions=f"{self.system_prompt}",
|
| 123 |
-
temperature=0.1,
|
| 124 |
-
tools=[{"type": "file_search"}]
|
| 125 |
-
)
|
| 126 |
-
else:
|
| 127 |
-
self.assistant = self.client.beta.assistants.retrieve(assistant_id)
|
| 128 |
-
return self
|
| 129 |
-
|
| 130 |
-
def get_assistant(self):
|
| 131 |
-
return self.assistant if not isinstance(self.assistant, None) else None
|
| 132 |
-
|
| 133 |
-
def get_assistant_id(self):
|
| 134 |
-
return self.assistant.id if not isinstance(self.assistant, None) else None
|
| 135 |
-
|
| 136 |
-
def add_file_to_vector_store(self, file_paths):
|
| 137 |
-
file_streams = [open(path, "rb") for path in file_paths]
|
| 138 |
-
file_batch = self.client.beta.vector_stores.file_batches.upload_and_poll(
|
| 139 |
-
vector_store_id=self.vector_store.id, files=file_streams
|
| 140 |
-
)
|
| 141 |
-
return file_batch
|
| 142 |
-
|
| 143 |
-
def set_payload(self,
|
| 144 |
-
content_message:str,
|
| 145 |
-
operation_prompt: Optional[str] = ""):
|
| 146 |
-
self.payload_content = f'{operation_prompt} :\n"""\n{content_message}\n"""'
|
| 147 |
-
return self
|
| 148 |
-
|
| 149 |
-
def set_system_prompt(self,
|
| 150 |
-
system_prompt: Optional[str] = ""):
|
| 151 |
-
self.system_prompt = system_prompt
|
| 152 |
-
return self
|
| 153 |
-
|
| 154 |
-
def add_vector_store_to_ressource(self, vector_store_id):
|
| 155 |
-
self.list_vector_store_ids.append(vector_store_id)
|
| 156 |
-
return self
|
| 157 |
-
|
| 158 |
-
def get_vector_store_ids(self):
|
| 159 |
-
return self.list_vector_store_ids
|
| 160 |
-
|
| 161 |
-
def empty_vector_store_ids(self):
|
| 162 |
-
self.list_vector_store_ids = []
|
| 163 |
-
return self
|
| 164 |
-
|
| 165 |
-
def update_vector_store_ids(self):
|
| 166 |
-
self.assistant = self.client.beta.assistants.update(
|
| 167 |
-
assistant_id=self.assistant.id,
|
| 168 |
-
tool_resources={"file_search": {"vector_store_ids": self.list_vector_store_ids}},
|
| 169 |
-
)
|
| 170 |
-
|
| 171 |
-
def use_assistant(self):
|
| 172 |
-
# Utiliser l'assistant ici
|
| 173 |
-
full_response = ""
|
| 174 |
-
with self.client.beta.threads.create_and_run(
|
| 175 |
-
assistant_id=self.assistant.id,
|
| 176 |
-
thread={
|
| 177 |
-
"messages": [
|
| 178 |
-
{"role": "user", "content": self.payload_content}
|
| 179 |
-
]
|
| 180 |
-
},
|
| 181 |
-
stream=True
|
| 182 |
-
) as stream:
|
| 183 |
-
for event in stream:
|
| 184 |
-
if event.event == "thread.message.delta":
|
| 185 |
-
full_response += event.data.delta.content[0].text.value
|
| 186 |
-
yield full_response + "โ"
|
| 187 |
-
elif event.event == "thread.message.completed":
|
| 188 |
-
yield event.data.content[0].text.value
|
| 189 |
-
return
|
| 190 |
-
|
| 191 |
-
if __name__ == "__main__":
|
| 192 |
-
demorrha = DemorrhaAssistant()
|
| 193 |
-
vector_store = demorrha.load_vector_store().get_vector_store()
|
| 194 |
-
demorrha.empty_vector_store_ids()
|
| 195 |
-
print(vector_store)
|
| 196 |
-
|
| 197 |
-
if vector_store.status == "completed":
|
| 198 |
-
if vector_store.file_counts.total > 0:
|
| 199 |
-
if vector_store.file_counts.completed == vector_store.file_counts.total:
|
| 200 |
-
print("Le chargement du vecteur est terminรฉ.")
|
| 201 |
-
demorrha.add_vector_store_to_ressource(vector_store.id)
|
| 202 |
-
else:
|
| 203 |
-
file_paths = ["style.txt"]
|
| 204 |
-
file_batch = demorrha.add_file_to_vector_store(file_paths)
|
| 205 |
-
print("Fichier ajoutรฉ au vector_store:", file_batch)
|
| 206 |
-
|
| 207 |
-
# Exemple d'utilsation de l'assistant
|
| 208 |
-
demorrha.load_assistant("Demorrha")
|
| 209 |
-
demorrha.update_vector_store_ids()
|
| 210 |
-
demorrha.set_payload("Tu dois faire preuve de courage pour trouver la force.", "Traduit le texte en Anglais. Et applique les instructions du fichier \'style.txt\'")
|
| 211 |
-
response_generator = demorrha.use_assistant()
|
| 212 |
-
final_response = ""
|
| 213 |
-
for response in response_generator:
|
| 214 |
-
print(response, end="\r")
|
| 215 |
-
final_response = response
|
| 216 |
-
print(f"\nRรฉponse finale de l'assistant: {final_response}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
core/files.py
DELETED
|
@@ -1,53 +0,0 @@
|
|
| 1 |
-
import json
|
| 2 |
-
from typing import Any
|
| 3 |
-
from typing import Dict
|
| 4 |
-
from typing import IO
|
| 5 |
-
from typing import List
|
| 6 |
-
from typing import Optional
|
| 7 |
-
from typing import Tuple
|
| 8 |
-
from typing import Union
|
| 9 |
-
|
| 10 |
-
def load_ui_language(file_path: Optional[str] = "ui_lang_support.json") -> Dict[str, Any]:
|
| 11 |
-
"""
|
| 12 |
-
Charge les traductions de l'interface utilisateur ร partir d'un fichier JSON.
|
| 13 |
-
|
| 14 |
-
Args:
|
| 15 |
-
file_path (Optional[str]): Chemin vers le fichier JSON contenant les traductions.
|
| 16 |
-
|
| 17 |
-
Returns:
|
| 18 |
-
Dict[str, Any]: Un dictionnaire contenant les traductions de l'interface utilisateur.
|
| 19 |
-
"""
|
| 20 |
-
try:
|
| 21 |
-
with open(file_path, 'r', encoding='utf-8') as file:
|
| 22 |
-
return json.load(file)
|
| 23 |
-
except FileNotFoundError:
|
| 24 |
-
print(f"File Not Found: {file_path}")
|
| 25 |
-
return None
|
| 26 |
-
except json.JSONDecodeError:
|
| 27 |
-
print(f"JSON decoding error : {file_path}")
|
| 28 |
-
return None
|
| 29 |
-
except IOError as e:
|
| 30 |
-
print(f"I/O Error : {e}")
|
| 31 |
-
return None
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
# usage e.g.: read a plaintext prompt file
|
| 35 |
-
def read_file(file_name: str) -> str:
|
| 36 |
-
"""
|
| 37 |
-
Lit et retourne le contenu des fichiers texte.
|
| 38 |
-
|
| 39 |
-
Args:
|
| 40 |
-
file_name (str): Le nom du fichier ร lire.
|
| 41 |
-
|
| 42 |
-
Returns:
|
| 43 |
-
str: Le contenu du fichier ou un message d'erreur.
|
| 44 |
-
"""
|
| 45 |
-
try:
|
| 46 |
-
with open(file_name, 'r', encoding='utf-8') as file:
|
| 47 |
-
content = file.read()
|
| 48 |
-
return content
|
| 49 |
-
except FileNotFoundError:
|
| 50 |
-
return f"File Not Found : {file_name}"
|
| 51 |
-
except IOError as e:
|
| 52 |
-
return f"I/O Error : {str(e)}"
|
| 53 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
core/moderation.py
DELETED
|
@@ -1,60 +0,0 @@
|
|
| 1 |
-
#coding: utf-8
|
| 2 |
-
|
| 3 |
-
from typing import Any
|
| 4 |
-
from typing import Dict
|
| 5 |
-
#from typing import List
|
| 6 |
-
#from typing import Tuple
|
| 7 |
-
|
| 8 |
-
from typing import Optional
|
| 9 |
-
#from typing import Union
|
| 10 |
-
from os import getenv
|
| 11 |
-
import json
|
| 12 |
-
|
| 13 |
-
from openai import OpenAI
|
| 14 |
-
from dotenv import load_dotenv
|
| 15 |
-
|
| 16 |
-
def moderate_text(input_text: str,
|
| 17 |
-
model: Optional[str] = "omni-moderation-latest") -> Dict[str, Any]:
|
| 18 |
-
|
| 19 |
-
# Load environment variables from the .env file
|
| 20 |
-
load_dotenv()
|
| 21 |
-
|
| 22 |
-
# correct values for `model` is "omni-moderation-latest" and "text-moderation-latest"
|
| 23 |
-
try:
|
| 24 |
-
client = OpenAI(api_key=getenv("OPENAI_API_KEY"))
|
| 25 |
-
response = client.moderations.create(
|
| 26 |
-
model=f"{model}",
|
| 27 |
-
input=f"{input_text}"
|
| 28 |
-
)
|
| 29 |
-
return response
|
| 30 |
-
except Exception as e:
|
| 31 |
-
print(f"An error occurred: {e}")
|
| 32 |
-
return None
|
| 33 |
-
|
| 34 |
-
def api_moderation_openai_text(text_to_moderate: str) -> Dict[str, Any]:
|
| 35 |
-
response = moderate_text(text_to_moderate)
|
| 36 |
-
if response:
|
| 37 |
-
moderation_result = json.loads(response.to_json())
|
| 38 |
-
|
| 39 |
-
result_dict = {
|
| 40 |
-
"flagged": False,
|
| 41 |
-
"results": []
|
| 42 |
-
}
|
| 43 |
-
|
| 44 |
-
for result in moderation_result["results"]:
|
| 45 |
-
result_dict["flagged"] = result_dict["flagged"] or result["flagged"]
|
| 46 |
-
result_dict["results"].append({
|
| 47 |
-
"categories": result["categories"],
|
| 48 |
-
"category_scores": result["category_scores"],
|
| 49 |
-
"category_applied_input_types": result["category_applied_input_types"]
|
| 50 |
-
})
|
| 51 |
-
|
| 52 |
-
return result_dict
|
| 53 |
-
else:
|
| 54 |
-
return {"error": "La modรฉration a รฉchouรฉ"}
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
#if __name__=='__main__':
|
| 58 |
-
# text_to_moderate = "Ceci est un exemple de texte ร modรฉrer."
|
| 59 |
-
# result = api_moderation_openai_text(text_to_moderate)
|
| 60 |
-
# print("Rรฉsultat de la modรฉration :", result)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
core/sound_generation.py
DELETED
|
@@ -1 +0,0 @@
|
|
| 1 |
-
#coding: utf-8
|
|
|
|
|
|
core/speech_to_text.py
DELETED
|
@@ -1,255 +0,0 @@
|
|
| 1 |
-
#coding: utf-8
|
| 2 |
-
# Importation des bibliothรจques nรฉcessaires selon les bonnes pratiques PEP8
|
| 3 |
-
import requests # Pour envoyer des requรชtes HTTP ร l'API
|
| 4 |
-
import json # Pour traiter les rรฉponses JSON de l'API
|
| 5 |
-
from os import getenv
|
| 6 |
-
from pydub import AudioSegment
|
| 7 |
-
from openai import OpenAI
|
| 8 |
-
from io import BytesIO
|
| 9 |
-
#from typing import Any
|
| 10 |
-
#from typing import Dict
|
| 11 |
-
from typing import IO
|
| 12 |
-
from typing import List
|
| 13 |
-
from typing import Optional
|
| 14 |
-
#from typing import Tuple
|
| 15 |
-
from typing import Union
|
| 16 |
-
import os
|
| 17 |
-
import streamlit as st
|
| 18 |
-
|
| 19 |
-
from core.DetectLanguage import detect_language
|
| 20 |
-
|
| 21 |
-
def huggingface_endpoints_stt(
|
| 22 |
-
filepath: Union[str, IO]
|
| 23 |
-
) -> str:
|
| 24 |
-
"""
|
| 25 |
-
Transcrit un fichier audio en texte en utilisant l'API Hugging Face.
|
| 26 |
-
|
| 27 |
-
Args:
|
| 28 |
-
filepath (Union[str, IO]): Chemin vers le fichier audio ou objet IO contenant l'audio ร transcrire.
|
| 29 |
-
|
| 30 |
-
Returns:
|
| 31 |
-
str: Le texte transcrit ร partir de l'audio.
|
| 32 |
-
|
| 33 |
-
Raises:
|
| 34 |
-
Exception: Si une erreur survient lors de l'appel ร l'API Hugging Face.
|
| 35 |
-
"""
|
| 36 |
-
|
| 37 |
-
file_path = filepath if isinstance(filepath, str) else filepath.name
|
| 38 |
-
|
| 39 |
-
# Dรฉfinir l'URL de l'endpoint d'infรฉrence sur Hugging Face
|
| 40 |
-
API_URL = f"{getenv('HF_WHISPER_ENDPOINT')}"
|
| 41 |
-
# Inclure votre token d'accรจs Hugging Face dans les en-tรชtes de la requรชte
|
| 42 |
-
headers = {
|
| 43 |
-
"Authorization": f"Bearer {getenv('HF_API_TOKEN')}"
|
| 44 |
-
}
|
| 45 |
-
|
| 46 |
-
# Ajouter le type de contenu audio ร l'en-tรชte de la requรชte
|
| 47 |
-
ext = file_path.split('.')[-1].lower()
|
| 48 |
-
if ext == "mp3":
|
| 49 |
-
mime_type = "audio/mpeg"
|
| 50 |
-
else:
|
| 51 |
-
mime_type = f"audio/{ext}"
|
| 52 |
-
headers["Content-Type"] = mime_type
|
| 53 |
-
|
| 54 |
-
# Ouvrir le fichier audio en mode binaire
|
| 55 |
-
with open(file_path, "rb") as audio:
|
| 56 |
-
# Envoyer une requรชte POST ร l'API avec le fichier audio
|
| 57 |
-
response = requests.post(API_URL, headers=headers, data=audio)
|
| 58 |
-
|
| 59 |
-
# Vรฉrifier si la requรชte a rรฉussi (code 200)
|
| 60 |
-
if response.status_code == 200:
|
| 61 |
-
# Extraire la transcription du texte de la rรฉponse JSON
|
| 62 |
-
transcription = json.loads(response.content.decode("utf-8"))
|
| 63 |
-
return transcription.get("text", "Pas de transcription disponible.")
|
| 64 |
-
else:
|
| 65 |
-
# En cas d'erreur, afficher le code de statut et le message
|
| 66 |
-
raise Exception(f"Erreur API: {response.status_code}, {response.text}")
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
# ############################################################
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
def transcribe_audio(
|
| 74 |
-
filepath: Union[str, IO],
|
| 75 |
-
language: Optional[str] = None
|
| 76 |
-
) -> str:
|
| 77 |
-
"""
|
| 78 |
-
Transcrit un fichier audio en texte.
|
| 79 |
-
|
| 80 |
-
Args:
|
| 81 |
-
filepath (Union[str, IO]): Chemin vers le fichier audio ou objet IO.
|
| 82 |
-
language (Optional[str]): Code de langue ISO 639-1 pour la transcription.
|
| 83 |
-
|
| 84 |
-
Returns:
|
| 85 |
-
str: Le texte transcrit ou une chaรฎne vide en cas d'erreur.
|
| 86 |
-
|
| 87 |
-
Raises:
|
| 88 |
-
ValueError: Si le fichier audio est invalide ou vide.
|
| 89 |
-
IOError: Si une erreur se produit lors de la lecture du fichier.
|
| 90 |
-
"""
|
| 91 |
-
if not filepath:
|
| 92 |
-
st.error("Erreur : Aucun fichier audio fourni")
|
| 93 |
-
return ""
|
| 94 |
-
|
| 95 |
-
try:
|
| 96 |
-
# Vรฉrifier si le fichier existe et est accessible
|
| 97 |
-
if isinstance(filepath, str) and not os.path.exists(filepath):
|
| 98 |
-
raise FileNotFoundError(f"Le fichier {filepath} n'existe pas")
|
| 99 |
-
|
| 100 |
-
# Vรฉrifier la taille du fichier
|
| 101 |
-
file_size = os.path.getsize(filepath) if isinstance(filepath, str) else filepath.tell()
|
| 102 |
-
if file_size == 0:
|
| 103 |
-
raise ValueError("Le fichier audio est vide")
|
| 104 |
-
|
| 105 |
-
# Transcription avec Hugging Face
|
| 106 |
-
try:
|
| 107 |
-
transcription = huggingface_endpoints_stt(filepath)
|
| 108 |
-
if transcription:
|
| 109 |
-
return transcription
|
| 110 |
-
except Exception as hf_error:
|
| 111 |
-
st.warning(f"Erreur avec l'endpoint Hugging Face, tentative avec OpenAI : {hf_error}")
|
| 112 |
-
|
| 113 |
-
# Transcription avec OpenAI comme fallback
|
| 114 |
-
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
|
| 115 |
-
audio_file = open(filepath if isinstance(filepath, str) else filepath.name, "rb")
|
| 116 |
-
transcription = client.audio.transcriptions.create(
|
| 117 |
-
model="whisper-1",
|
| 118 |
-
file=audio_file,
|
| 119 |
-
language=language
|
| 120 |
-
)
|
| 121 |
-
return transcription.text
|
| 122 |
-
|
| 123 |
-
except FileNotFoundError as e:
|
| 124 |
-
st.error(f"Erreur : {e}")
|
| 125 |
-
return ""
|
| 126 |
-
except ValueError as e:
|
| 127 |
-
st.error(f"Erreur : {e}")
|
| 128 |
-
return ""
|
| 129 |
-
except IOError as e:
|
| 130 |
-
st.error(f"Erreur lors de la lecture du fichier audio : {e}")
|
| 131 |
-
return ""
|
| 132 |
-
except Exception as e:
|
| 133 |
-
st.error(f"Une erreur inattendue s'est produite lors de la transcription : {e}")
|
| 134 |
-
return ""
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
# ############################################################
|
| 138 |
-
|
| 139 |
-
def translate_audio(filepath: Union[str, IO]) -> str:
|
| 140 |
-
"""
|
| 141 |
-
Traduit un fichier audio temporaire en Anglais.
|
| 142 |
-
|
| 143 |
-
Args:
|
| 144 |
-
filepath Chemin vers le fichier audio temporaire ร traduire.
|
| 145 |
-
|
| 146 |
-
Returns:
|
| 147 |
-
str: Le texte traduit.
|
| 148 |
-
"""
|
| 149 |
-
max_size_mb = 25
|
| 150 |
-
translated_text = []
|
| 151 |
-
client = OpenAI(api_key=getenv("OPENAI_API_KEY"))
|
| 152 |
-
try:
|
| 153 |
-
with open(filepath if isinstance(filepath, str) else filepath.name, "rb") as f:
|
| 154 |
-
# filepath peut etre un chemin vers un fichier audio ou un objet IO
|
| 155 |
-
f.seek(0)
|
| 156 |
-
audio = AudioSegment.from_file(f)
|
| 157 |
-
duration_ms = len(audio)
|
| 158 |
-
segment_duration_ms = int(
|
| 159 |
-
(max_size_mb * 1024 * 1024 * 8) /
|
| 160 |
-
(audio.frame_rate * audio.sample_width * audio.channels)
|
| 161 |
-
)
|
| 162 |
-
|
| 163 |
-
for start in range(0, duration_ms, segment_duration_ms):
|
| 164 |
-
end = min(start + segment_duration_ms, duration_ms)
|
| 165 |
-
segment = audio[start:end]
|
| 166 |
-
|
| 167 |
-
buffer = BytesIO()
|
| 168 |
-
segment.export(buffer, format="mp3")
|
| 169 |
-
buffer.seek(0)
|
| 170 |
-
|
| 171 |
-
translation = client.audio.translations.create(
|
| 172 |
-
model="whisper-1",
|
| 173 |
-
file=("audio.mp3", buffer)
|
| 174 |
-
)
|
| 175 |
-
translated_text.append(translation)
|
| 176 |
-
|
| 177 |
-
return " ".join(translated_text)
|
| 178 |
-
except Exception as e:
|
| 179 |
-
print(f"Erreur lors de la traduction de l'audio : {e}")
|
| 180 |
-
return ""
|
| 181 |
-
|
| 182 |
-
|
| 183 |
-
|
| 184 |
-
|
| 185 |
-
|
| 186 |
-
|
| 187 |
-
# ############################################################
|
| 188 |
-
|
| 189 |
-
|
| 190 |
-
class SpeechToText(object):
|
| 191 |
-
def __init__(self,
|
| 192 |
-
api_key: str):
|
| 193 |
-
self.api_key = api_key
|
| 194 |
-
self.client = OpenAI(api_key=self.api_key)
|
| 195 |
-
|
| 196 |
-
def aquire_audio(self,
|
| 197 |
-
filepath: Union[str, IO, List[Union[str, IO]]]):
|
| 198 |
-
"""
|
| 199 |
-
Integrer la detection de langue :
|
| 200 |
-
Ajoute un appel a la fonction detect_language juste apres l'aquisition de l'audio et avant de choisir entre transcrire ou traduire.
|
| 201 |
-
"""
|
| 202 |
-
if isinstance(filepath, str):
|
| 203 |
-
file_paths = [filepath]
|
| 204 |
-
elif isinstance(filepath, IO):
|
| 205 |
-
file_paths = [filepath.name]
|
| 206 |
-
else:
|
| 207 |
-
file_paths = [f'{file_path}' if isinstance(filepath, List) and isinstance(file_path, str) else file_path.name for file_path in filepath]
|
| 208 |
-
|
| 209 |
-
# create the list 'file_streams'
|
| 210 |
-
file_streams = [open(filepath, "rb") for filepath in file_paths]
|
| 211 |
-
|
| 212 |
-
def create_assistant():
|
| 213 |
-
return self.client.beta.assistants.create(
|
| 214 |
-
name="Audio Language Detector",
|
| 215 |
-
instructions=" ".join([
|
| 216 |
-
"Act as an language detection function for an audio file.",
|
| 217 |
-
"You are the assistant designed to detect the language of an audio file.",
|
| 218 |
-
"This assistant is designed to detect the language of an audio file.",
|
| 219 |
-
"You receive an audio file as input, and you analyze it to determine the language spoken in the audio.",
|
| 220 |
-
"The assistant will return the detected language of the audio in ISO 639-1 format.",
|
| 221 |
-
""
|
| 222 |
-
]),
|
| 223 |
-
model="gpt-4o",
|
| 224 |
-
tools=[{"type": "file_search"}]
|
| 225 |
-
)
|
| 226 |
-
|
| 227 |
-
|
| 228 |
-
def create_vector_store():
|
| 229 |
-
return self.client.beta.vector_stores.create(
|
| 230 |
-
name="Audio Language Detection"
|
| 231 |
-
)
|
| 232 |
-
|
| 233 |
-
|
| 234 |
-
assistant = create_assistant()
|
| 235 |
-
vectore_store = create_vector_store()
|
| 236 |
-
|
| 237 |
-
file_batch = self.client.beta.vector_stores.file_batches.upload_and_poll(
|
| 238 |
-
vector_store_id=vectore_store.id,
|
| 239 |
-
files=file_streams
|
| 240 |
-
)
|
| 241 |
-
|
| 242 |
-
# update the assistant to use the vector store
|
| 243 |
-
assistant = self.client.beta.assistants.update(
|
| 244 |
-
assistant_id=assistant.id,
|
| 245 |
-
tool_ressources={"file_search": {"vector_store_ids": [vectore_store.id]}}
|
| 246 |
-
)
|
| 247 |
-
|
| 248 |
-
## Create a thread
|
| 249 |
-
|
| 250 |
-
### Upload the user provided audio
|
| 251 |
-
message_file = self.client.files.create(
|
| 252 |
-
file=open(file_paths[0], "rb")
|
| 253 |
-
)
|
| 254 |
-
|
| 255 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
core/text_to_speech.py
DELETED
|
@@ -1,162 +0,0 @@
|
|
| 1 |
-
#coding: utf-8
|
| 2 |
-
|
| 3 |
-
import os
|
| 4 |
-
import tempfile
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
#from typing import Any
|
| 8 |
-
#from typing import Dict
|
| 9 |
-
#from typing import IO
|
| 10 |
-
#from typing import List
|
| 11 |
-
from typing import Optional
|
| 12 |
-
from typing import Tuple
|
| 13 |
-
#from typing import Union
|
| 14 |
-
from base64 import b64encode
|
| 15 |
-
|
| 16 |
-
from openai import OpenAI
|
| 17 |
-
from pydub import AudioSegment
|
| 18 |
-
import streamlit as st
|
| 19 |
-
|
| 20 |
-
#from dotenv import load_dotenv
|
| 21 |
-
# Charger les variables d'environnement depuis le fichier .env
|
| 22 |
-
#load_dotenv()
|
| 23 |
-
|
| 24 |
-
class openai_tts(object):
|
| 25 |
-
def __init__(self,
|
| 26 |
-
tts_voice: Optional[str] = "nova",
|
| 27 |
-
tts_model: Optional[str] = "tts-1",
|
| 28 |
-
response_format: Optional[str] = "mp3",
|
| 29 |
-
speed: Optional[float] = 1.0
|
| 30 |
-
):
|
| 31 |
-
self.client = None
|
| 32 |
-
self.init_supported_formats__()
|
| 33 |
-
self.init_api_client()
|
| 34 |
-
|
| 35 |
-
if response_format:
|
| 36 |
-
self.set_response_format(response_format)
|
| 37 |
-
if tts_voice:
|
| 38 |
-
self.set_tts_voice(tts_voice)
|
| 39 |
-
if tts_model:
|
| 40 |
-
self.set_tts_model(tts_model)
|
| 41 |
-
if speed:
|
| 42 |
-
self.set_tts_speed(speed)
|
| 43 |
-
|
| 44 |
-
def set_tts_speed(self, speed):
|
| 45 |
-
if not (0.25 <= speed <= 4.0):
|
| 46 |
-
raise ValueError(f"[TTS] - Speed must be between 0.25 and 4.0. Provided value: {speed}")
|
| 47 |
-
else:
|
| 48 |
-
self.speed = speed
|
| 49 |
-
return self
|
| 50 |
-
|
| 51 |
-
def set_tts_voice(self, voice):
|
| 52 |
-
voix_valides = ["alloy", "echo", "fable", "onyx", "nova", "shimmer"]
|
| 53 |
-
if voice not in voix_valides:
|
| 54 |
-
raise ValueError(f"[TTS] - Invalid TTS voice: {voice}. Valid voices are: {', '.join(voix_valides)}.")
|
| 55 |
-
else:
|
| 56 |
-
self.tts_voice = voice
|
| 57 |
-
return self
|
| 58 |
-
|
| 59 |
-
def set_tts_model(self, model):
|
| 60 |
-
if model not in ["tts-1", "tts-1-hd"]:
|
| 61 |
-
raise ValueError(f"[TTS] - Invalid TTS model: {model}. Valid models are 'tts-1' and 'tts-1-hd'.")
|
| 62 |
-
else:
|
| 63 |
-
self.tts_model = model
|
| 64 |
-
return self
|
| 65 |
-
|
| 66 |
-
def init_supported_formats__(self):
|
| 67 |
-
self.supported_formats = [ 'mp3', 'opus', 'aac', 'flac', 'wav', 'pcm' ]
|
| 68 |
-
return self
|
| 69 |
-
|
| 70 |
-
def set_response_format(self, format: str):
|
| 71 |
-
if format not in self.supported_formats:
|
| 72 |
-
raise ValueError(f"[TTS] - Unsupported format: {format}. Supported formats are: {', '.join(self.supported_formats)}")
|
| 73 |
-
else:
|
| 74 |
-
self.response_format = format
|
| 75 |
-
return self
|
| 76 |
-
|
| 77 |
-
def init_api_client(self):
|
| 78 |
-
if not (self.client):
|
| 79 |
-
# OpenAI client configuration with API key
|
| 80 |
-
self.client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
|
| 81 |
-
return self
|
| 82 |
-
|
| 83 |
-
def text_to_speech(self,
|
| 84 |
-
input_text: str) -> Tuple[Optional[bytes], float]:
|
| 85 |
-
"""
|
| 86 |
-
Convertit du texte en parole en utilisant l'API OpenAI.
|
| 87 |
-
|
| 88 |
-
Args:
|
| 89 |
-
input_text (str): Le texte ร convertir en parole.
|
| 90 |
-
|
| 91 |
-
Returns:
|
| 92 |
-
Dict[str, Union[float, str]]: Un dictionnaire contenant:
|
| 93 |
-
- 'audio_duration' (float): La durรฉe de l'audio en secondes.
|
| 94 |
-
- 'data_bytes' (str): Les donnรฉes audio encodรฉes en base64.
|
| 95 |
-
"""
|
| 96 |
-
response = self.client.audio.speech.create(
|
| 97 |
-
model=self.tts_model,
|
| 98 |
-
voice=self.tts_voice,
|
| 99 |
-
input=input_text,
|
| 100 |
-
response_format=self.response_format,
|
| 101 |
-
speed=self.speed
|
| 102 |
-
)
|
| 103 |
-
data_output = response.read()
|
| 104 |
-
|
| 105 |
-
tmp_file = tempfile.TemporaryFile()
|
| 106 |
-
tmp_file.write(data_output)
|
| 107 |
-
tmp_file.seek(0)
|
| 108 |
-
audio = AudioSegment.from_file(tmp_file, format=self.response_format)
|
| 109 |
-
duration = len(audio) / 1000
|
| 110 |
-
tmp_file.close()
|
| 111 |
-
|
| 112 |
-
return {
|
| 113 |
-
"audio_duration": duration,
|
| 114 |
-
"data_bytes": b64encode(data_output).decode()
|
| 115 |
-
}
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
def process_tts_message(text_response: str) -> Tuple[Optional[bytes], Optional[float]]:
|
| 119 |
-
"""
|
| 120 |
-
Convertit un texte en parole en utilisant l'API OpenAI TTS.
|
| 121 |
-
|
| 122 |
-
Args:
|
| 123 |
-
text_response (str): Le texte ร convertir en parole.
|
| 124 |
-
|
| 125 |
-
Returns:
|
| 126 |
-
Tuple[Optional[bytes], Optional[float]]: Un tuple contenant les donnรฉes audio encodรฉes en base64
|
| 127 |
-
et la durรฉe de l'audio, ou (None, None) en cas d'erreur.
|
| 128 |
-
"""
|
| 129 |
-
if not text_response or not isinstance(text_response, str):
|
| 130 |
-
st.error("Erreur : Le texte ร convertir est invalide ou vide")
|
| 131 |
-
return None, None
|
| 132 |
-
|
| 133 |
-
try:
|
| 134 |
-
tts = openai_tts(
|
| 135 |
-
tts_voice=st.session_state.tts_voice,
|
| 136 |
-
tts_model="tts-1",
|
| 137 |
-
response_format="mp3",
|
| 138 |
-
speed=1.0
|
| 139 |
-
)
|
| 140 |
-
tts_output_ = tts.text_to_speech(text_response)
|
| 141 |
-
return tts_output_["data_bytes"], tts_output_["audio_duration"]
|
| 142 |
-
|
| 143 |
-
except ValueError as ve:
|
| 144 |
-
# Erreurs de validation (voix invalide, format non supportรฉ, etc.)
|
| 145 |
-
st.error(f"Erreur de configuration TTS : {ve}")
|
| 146 |
-
return None, None
|
| 147 |
-
|
| 148 |
-
except (KeyError, AttributeError) as ke:
|
| 149 |
-
# Erreurs liรฉes aux variables de session manquantes
|
| 150 |
-
st.error("Erreur : Configuration TTS incomplรจte ou invalide")
|
| 151 |
-
return None, None
|
| 152 |
-
|
| 153 |
-
except Exception as e:
|
| 154 |
-
# Autres erreurs (rรฉseau, API, etc.)
|
| 155 |
-
st.error(f"Une erreur s'est produite lors de la conversion texte-parole : {e}")
|
| 156 |
-
return None, None
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
if __name__ == "__main__":
|
| 160 |
-
|
| 161 |
-
openai_tts().text_to_speech("Hello, I am an AI assistant. How can I help you?")
|
| 162 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
pages/configuration_ui_lang.py
DELETED
|
@@ -1,128 +0,0 @@
|
|
| 1 |
-
# Standard libraries
|
| 2 |
-
#import base64
|
| 3 |
-
#import io
|
| 4 |
-
import json
|
| 5 |
-
#import os
|
| 6 |
-
#import re
|
| 7 |
-
#import tempfile
|
| 8 |
-
#import time
|
| 9 |
-
#from os import getenv
|
| 10 |
-
from typing import Any
|
| 11 |
-
from typing import Dict
|
| 12 |
-
#from typing import IO
|
| 13 |
-
#from typing import List
|
| 14 |
-
from typing import Optional
|
| 15 |
-
#from typing import Tuple
|
| 16 |
-
#from typing import Union
|
| 17 |
-
|
| 18 |
-
# Third-party libraries
|
| 19 |
-
import streamlit as st
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
from var_app import __version__
|
| 23 |
-
|
| 24 |
-
LANGUAGES_EMOJI = {
|
| 25 |
-
"Afrikaans": "๐ฟ๐ฆ", "Arabic": "๐ธ๐ฆ", "Armenian": "๐ฆ๐ฒ", "Azerbaijani": "๐ฆ๐ฟ", "Belarusian": "๐ง๐พ",
|
| 26 |
-
"Bosnian": "๐ง๐ฆ", "Bulgarian": "๐ง๐ฌ", "Catalan": "๐ช๐ธ", "Chinese": "๐จ๐ณ", "Croatian": "๐ญ๐ท",
|
| 27 |
-
"Czech": "๐จ๐ฟ", "Danish": "๐ฉ๐ฐ", "Dutch": "๐ณ๐ฑ", "English": "๐ฌ๐ง", "Estonian": "๐ช๐ช",
|
| 28 |
-
"Finnish": "๐ซ๐ฎ", "French": "๐ซ๐ท", "Galician": "๐ช๐ธ", "German": "๐ฉ๐ช", "Greek": "๐ฌ๐ท",
|
| 29 |
-
"Hebrew": "๐ฎ๐ฑ", "Hindi": "๐ฎ๐ณ", "Hungarian": "๐ญ๐บ", "Icelandic": "๐ฎ๐ธ", "Indonesian": "๐ฎ๐ฉ",
|
| 30 |
-
"Italian": "๐ฎ๐น", "Japanese": "๐ฏ๐ต", "Kannada": "๐ฎ๐ณ", "Kazakh": "๐ฐ๐ฟ", "Korean": "๐ฐ๐ท",
|
| 31 |
-
"Latvian": "๐ฑ๐ป", "Lithuanian": "๐ฑ๐น", "Macedonian": "๐ฒ๐ฐ", "Malay": "๐ฒ๐พ", "Marathi": "๐ฎ๐ณ",
|
| 32 |
-
"Maori": "๐ณ๐ฟ", "Nepali": "๐ณ๐ต", "Norwegian": "๐ณ๐ด", "Persian": "๐ฎ๐ท", "Polish": "๐ต๐ฑ",
|
| 33 |
-
"Portuguese": "๐ต๐น", "Romanian": "๐ท๐ด", "Russian": "๐ท๐บ", "Serbian": "๐ท๐ธ", "Slovak": "๐ธ๐ฐ",
|
| 34 |
-
"Slovenian": "๐ธ๐ฎ", "Spanish": "๐ช๐ธ", "Swahili": "๐ฐ๐ช", "Swedish": "๐ธ๐ช", "Tagalog": "๐ต๐ญ",
|
| 35 |
-
"Tamil": "๐ฎ๐ณ", "Thai": "๐น๐ญ", "Turkish": "๐น๐ท", "Ukrainian": "๐บ๐ฆ", "Urdu": "๐ต๐ฐ",
|
| 36 |
-
"Vietnamese": "๐ป๐ณ", "Welsh": "๐ด๓ ง๓ ข๓ ท๓ ฌ๓ ณ๓ ฟ"
|
| 37 |
-
}
|
| 38 |
-
|
| 39 |
-
def load_ui_language(file_path: Optional[str] = "ui_lang_support.json") -> Dict[str, Any]:
|
| 40 |
-
"""
|
| 41 |
-
Charge les traductions de l'interface utilisateur ร partir d'un fichier JSON.
|
| 42 |
-
|
| 43 |
-
Args:
|
| 44 |
-
file_path (Optional[str]): Chemin vers le fichier JSON contenant les traductions.
|
| 45 |
-
|
| 46 |
-
Returns:
|
| 47 |
-
Dict[str, Any]: Un dictionnaire contenant les traductions de l'interface utilisateur.
|
| 48 |
-
"""
|
| 49 |
-
try:
|
| 50 |
-
with open(file_path, 'r', encoding='utf-8') as file:
|
| 51 |
-
return json.load(file)
|
| 52 |
-
except FileNotFoundError:
|
| 53 |
-
print(f"{get_translation('erreur_fichier_non_trouve')} {file_path}")
|
| 54 |
-
return {}
|
| 55 |
-
except json.JSONDecodeError:
|
| 56 |
-
print(f"{get_translation('erreur_lecture_fichier')} JSON decoding error")
|
| 57 |
-
return {}
|
| 58 |
-
except IOError as e:
|
| 59 |
-
print(f"{get_translation('erreur_lecture_fichier')} {e}")
|
| 60 |
-
return {}
|
| 61 |
-
|
| 62 |
-
def get_translation(key: str) -> str:
|
| 63 |
-
"""
|
| 64 |
-
Obtient la traduction pour une clรฉ donnรฉe basรฉe sur la langue d'interface sรฉlectionnรฉe.
|
| 65 |
-
"""
|
| 66 |
-
lang = st.session_state.get('interface_language', 'English')
|
| 67 |
-
return translations.get(lang, {}).get(key, key)
|
| 68 |
-
|
| 69 |
-
# Dictionary to store translations
|
| 70 |
-
translations = load_ui_language()
|
| 71 |
-
|
| 72 |
-
def language_selection_page():
|
| 73 |
-
st.set_page_config(initial_sidebar_state="collapsed")
|
| 74 |
-
#st.set_page_config(page_icon="๐น")
|
| 75 |
-
#st.set_page_config(layout="wide")
|
| 76 |
-
# Fonction de rappel pour mettre ร jour la langue temporaire lorsque la sรฉlection change
|
| 77 |
-
def update_temp_language():
|
| 78 |
-
st.session_state['temp_interface_language'] = st.session_state['language_selector']
|
| 79 |
-
|
| 80 |
-
# Initialiser la langue temporaire si elle n'existe pas
|
| 81 |
-
if 'temp_interface_language' not in st.session_state:
|
| 82 |
-
st.session_state['temp_interface_language'] = st.session_state.get('interface_language', 'English')
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
# Sรฉlecteur de langue avec un callback pour mettre ร jour la langue temporaire
|
| 86 |
-
with st.container(
|
| 87 |
-
border=True,
|
| 88 |
-
key='lang_choices_container',
|
| 89 |
-
height=None
|
| 90 |
-
):
|
| 91 |
-
|
| 92 |
-
TITRE_LANGSELMENU=translations[st.session_state['temp_interface_language']].get('selection_de_la_langue', 'Language Selection')
|
| 93 |
-
# Titre basรฉ sur la langue temporaire
|
| 94 |
-
st.header(
|
| 95 |
-
f":blue[{TITRE_LANGSELMENU}]"
|
| 96 |
-
,divider=True)
|
| 97 |
-
|
| 98 |
-
selecteur_du_choix, boutton_de_validation = st.columns(2,
|
| 99 |
-
vertical_alignment='top',
|
| 100 |
-
gap="small")
|
| 101 |
-
|
| 102 |
-
selected_language = selecteur_du_choix.selectbox(
|
| 103 |
-
translations[st.session_state['temp_interface_language']].get('choix_selection_langue', 'Choose the interface language'),
|
| 104 |
-
options=list(translations.keys()),
|
| 105 |
-
index=list(translations.keys()).index(st.session_state['temp_interface_language']),
|
| 106 |
-
format_func=lambda lang: f"{LANGUAGES_EMOJI.get(lang, '')} {lang}",
|
| 107 |
-
key='language_selector',
|
| 108 |
-
on_change=update_temp_language,
|
| 109 |
-
label_visibility="collapsed"
|
| 110 |
-
)
|
| 111 |
-
|
| 112 |
-
# Bouton de confirmation avec traduction
|
| 113 |
-
if boutton_de_validation.button(
|
| 114 |
-
translations[st.session_state['temp_interface_language']].get(
|
| 115 |
-
'confirmer', 'Confirm'),
|
| 116 |
-
key='confirm_button_ui_lang_choice',
|
| 117 |
-
type="secondary",
|
| 118 |
-
use_container_width=True,
|
| 119 |
-
disabled = False
|
| 120 |
-
):
|
| 121 |
-
st.session_state.interface_language = selected_language
|
| 122 |
-
st.session_state.init_launch_app = False
|
| 123 |
-
st.switch_page(st.Page("pages/main.py", title="Main page"))
|
| 124 |
-
|
| 125 |
-
language_selection_page()
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
pages/main.py
DELETED
|
@@ -1,675 +0,0 @@
|
|
| 1 |
-
# Standard libraries
|
| 2 |
-
#import base64
|
| 3 |
-
import io
|
| 4 |
-
#import json
|
| 5 |
-
import os
|
| 6 |
-
import uuid
|
| 7 |
-
#import re
|
| 8 |
-
import tempfile
|
| 9 |
-
import time
|
| 10 |
-
from datetime import datetime
|
| 11 |
-
#from os import getenv
|
| 12 |
-
#from typing import Any
|
| 13 |
-
#from typing import Dict
|
| 14 |
-
#from typing import IO
|
| 15 |
-
from typing import List
|
| 16 |
-
from typing import Optional
|
| 17 |
-
from typing import Tuple
|
| 18 |
-
#from typing import Union
|
| 19 |
-
from typing import AnyStr
|
| 20 |
-
#from io import BytesIO
|
| 21 |
-
#from copy import deepcopy
|
| 22 |
-
import hashlib
|
| 23 |
-
|
| 24 |
-
# Third-party libraries
|
| 25 |
-
import requests
|
| 26 |
-
import streamlit as st
|
| 27 |
-
#import streamlit.components.v1 as components
|
| 28 |
-
|
| 29 |
-
#from audiorecorder import audiorecorder
|
| 30 |
-
from openai import OpenAI
|
| 31 |
-
from pydub import AudioSegment
|
| 32 |
-
import warnings
|
| 33 |
-
# Ignore DeprecationWarning
|
| 34 |
-
warnings.filterwarnings("ignore", category=DeprecationWarning)
|
| 35 |
-
|
| 36 |
-
from dotenv import load_dotenv
|
| 37 |
-
# Charger les variables d'environnement depuis le fichier .env
|
| 38 |
-
load_dotenv()
|
| 39 |
-
|
| 40 |
-
from var_app import __version__
|
| 41 |
-
from var_app import LANGUAGES_EMOJI
|
| 42 |
-
from var_app import SUPPORTED_LANGUAGES
|
| 43 |
-
from var_app import CHAT_FILES_UPLOAD_ALLOWED_TYPES
|
| 44 |
-
from core.core import translations
|
| 45 |
-
from core.core import get_translation
|
| 46 |
-
from core.converter import convert_iso6391_to_language_name
|
| 47 |
-
from core.converter import convert_language_name_to_iso6391
|
| 48 |
-
from core.files import read_file
|
| 49 |
-
from core.text_to_speech import openai_tts
|
| 50 |
-
from core.DetectLanguage import detect_language
|
| 51 |
-
from core.speech_to_text import huggingface_endpoints_stt
|
| 52 |
-
from core.speech_to_text import transcribe_audio
|
| 53 |
-
from core.audio_files import concatenate_audio_files
|
| 54 |
-
from core.audio_files import split_audio
|
| 55 |
-
from core.text_to_speech import process_tts_message
|
| 56 |
-
from core.files import load_ui_language
|
| 57 |
-
from core.core import process_message
|
| 58 |
-
from core.core import init_process_mode
|
| 59 |
-
from core.moderation import api_moderation_openai_text
|
| 60 |
-
from core.audio_isolation import isolate_audio
|
| 61 |
-
|
| 62 |
-
def init_langs_for_processing(
|
| 63 |
-
target_language: Optional[AnyStr] = "en",
|
| 64 |
-
interface_language: Optional[AnyStr] = "English",
|
| 65 |
-
language_detected: Optional[AnyStr] = None
|
| 66 |
-
) -> Tuple[str, str]:
|
| 67 |
-
# Initialisation du mode de traitement pour la langue cible actuelle
|
| 68 |
-
system_prompt, operation_prompt = init_process_mode(
|
| 69 |
-
from_lang = (
|
| 70 |
-
language_detected if "language_detected" in language_detected else convert_language_name_to_iso6391(
|
| 71 |
-
interface_language
|
| 72 |
-
)
|
| 73 |
-
),
|
| 74 |
-
to_lang = target_language
|
| 75 |
-
)
|
| 76 |
-
return (system_prompt, operation_prompt)
|
| 77 |
-
|
| 78 |
-
#def detection_langue_du_message_utilisateur(
|
| 79 |
-
# user_input: str,
|
| 80 |
-
# detect_lang_from_text: Optional[bool] = True,
|
| 81 |
-
# ):
|
| 82 |
-
# # Traitement du message texte de l'utilisateur
|
| 83 |
-
# if (detect_lang_from_text):
|
| 84 |
-
# language_detected = detect_language(
|
| 85 |
-
# input_text = user_input,
|
| 86 |
-
# temperature = 0.01,
|
| 87 |
-
# context_window = 512,
|
| 88 |
-
# model="gpt-4o-mini"
|
| 89 |
-
# )
|
| 90 |
-
# return {
|
| 91 |
-
# "text":f"{user_input}",
|
| 92 |
-
# "language": language_detected.strip()
|
| 93 |
-
# }
|
| 94 |
-
# else:
|
| 95 |
-
# return {
|
| 96 |
-
# "text":f"{user_input}",
|
| 97 |
-
# "language": "default"
|
| 98 |
-
# }
|
| 99 |
-
|
| 100 |
-
def user_message_moderation_check(
|
| 101 |
-
user_message: Optional[str] = None ):
|
| 102 |
-
# Appeler la fonction de modรฉration
|
| 103 |
-
moderation_result = api_moderation_openai_text(user_message)
|
| 104 |
-
if moderation_result.get("flagged"):
|
| 105 |
-
st.error("Votre message a รฉtรฉ jugรฉ inappropriรฉ et ne peut pas รชtre traitรฉ.")
|
| 106 |
-
return # Arrรชter le traitement si le message est inappropriรฉ
|
| 107 |
-
elif "error" in moderation_result:
|
| 108 |
-
st.error(moderation_result["error"])
|
| 109 |
-
return # Gรฉrer les erreurs de modรฉration
|
| 110 |
-
|
| 111 |
-
def hash_file(file):
|
| 112 |
-
hasher = hashlib.md5()
|
| 113 |
-
buf = file.read()
|
| 114 |
-
hasher.update(buf)
|
| 115 |
-
file.seek(0)
|
| 116 |
-
return hasher.hexdigest()
|
| 117 |
-
|
| 118 |
-
def callback_change_edited_text(key, value, modified_text):
|
| 119 |
-
if value["type"] in ["txt"]:
|
| 120 |
-
st.session_state.changed_uploaded_files[key]["bytes_data"] = st.session_state[modified_text].encode()
|
| 121 |
-
elif value["type"] in ["wav", "mp3"]:
|
| 122 |
-
st.session_state.changed_uploaded_files[key]["audio_transcription"] = st.session_state[modified_text]
|
| 123 |
-
|
| 124 |
-
def save_attachment(attachment):
|
| 125 |
-
"""Sauvegarde la piรจce jointe et retourne le chemin."""
|
| 126 |
-
# Crรฉer un dossier pour les piรจces jointes s'il n'existe pas
|
| 127 |
-
attachments_dir = 'attachments'
|
| 128 |
-
os.makedirs(attachments_dir, exist_ok=True)
|
| 129 |
-
|
| 130 |
-
# Gรฉnรฉrer un nom de fichier unique
|
| 131 |
-
file_extension = os.path.splitext(attachment.name)[1]
|
| 132 |
-
filename = f"{uuid.uuid4()}{file_extension}"
|
| 133 |
-
file_path = os.path.join(attachments_dir, filename)
|
| 134 |
-
|
| 135 |
-
# Sauvegarder le fichier
|
| 136 |
-
with open(file_path, 'wb') as f:
|
| 137 |
-
f.write(attachment.getbuffer())
|
| 138 |
-
|
| 139 |
-
return file_path
|
| 140 |
-
|
| 141 |
-
# Au dรฉbut du fichier, aprรจs les imports
|
| 142 |
-
st.set_page_config(
|
| 143 |
-
page_title=f"DEMORRHA - (v{__version__})",
|
| 144 |
-
page_icon="๐น",
|
| 145 |
-
layout="wide",
|
| 146 |
-
initial_sidebar_state="collapsed"
|
| 147 |
-
)
|
| 148 |
-
|
| 149 |
-
@st.dialog("STT Settings")
|
| 150 |
-
def stt_settings(state__stt_voice_isolation):
|
| 151 |
-
with st.expander(f"{get_translation('parametres_stt')}",
|
| 152 |
-
expanded=True,
|
| 153 |
-
icon="๐ค"):
|
| 154 |
-
set__stt_voice_isolation = st.checkbox(
|
| 155 |
-
get_translation("isolation_voix"),
|
| 156 |
-
value=state__stt_voice_isolation
|
| 157 |
-
)
|
| 158 |
-
|
| 159 |
-
if st.button("Submit"):
|
| 160 |
-
st.session_state.stt_voice_isolation = set__stt_voice_isolation
|
| 161 |
-
st.rerun()
|
| 162 |
-
|
| 163 |
-
@st.dialog("TTS Settings")
|
| 164 |
-
def tts_settings(name__tts_voice,
|
| 165 |
-
state__tts_with_text,
|
| 166 |
-
state__tts_with_audio,
|
| 167 |
-
state__autoplay_tts):
|
| 168 |
-
with st.expander(f"{get_translation('parametres_tts')}",
|
| 169 |
-
expanded=True,
|
| 170 |
-
icon="๐"):
|
| 171 |
-
set__tts_voice = st.selectbox(
|
| 172 |
-
get_translation("choix_voix_tts"),
|
| 173 |
-
options=["alloy", "echo", "fable", "onyx", "nova", "shimmer"],
|
| 174 |
-
index=list(["alloy", "echo", "fable", "onyx", "nova", "shimmer"]).index(name__tts_voice)
|
| 175 |
-
)
|
| 176 |
-
set__tts_with_text = st.checkbox(
|
| 177 |
-
get_translation("activer_tts_texte"),
|
| 178 |
-
value=state__tts_with_text
|
| 179 |
-
)
|
| 180 |
-
set__tts_with_audio = st.checkbox(
|
| 181 |
-
get_translation("activer_tts_audio"),
|
| 182 |
-
value=state__tts_with_audio
|
| 183 |
-
)
|
| 184 |
-
set__autoplay_tts = st.checkbox(
|
| 185 |
-
get_translation("lecture_auto_tts"),
|
| 186 |
-
value=state__autoplay_tts
|
| 187 |
-
)
|
| 188 |
-
|
| 189 |
-
if st.button("Submit"):
|
| 190 |
-
st.session_state.autoplay_tts = set__autoplay_tts
|
| 191 |
-
st.session_state.enable_tts_for_input_from_audio_record = set__tts_with_audio
|
| 192 |
-
st.session_state.enable_tts_for_input_from_text_field = set__tts_with_text
|
| 193 |
-
st.session_state.tts_voice = set__tts_voice
|
| 194 |
-
st.rerun()
|
| 195 |
-
|
| 196 |
-
@st.fragment
|
| 197 |
-
def recorder_released():
|
| 198 |
-
if "audio_list" not in st.session_state:
|
| 199 |
-
st.session_state.audio_list = []
|
| 200 |
-
|
| 201 |
-
if "rec_widget" in st.session_state:
|
| 202 |
-
if st.session_state.rec_widget:
|
| 203 |
-
audio_recorded = True
|
| 204 |
-
else:
|
| 205 |
-
audio_recorded = False
|
| 206 |
-
|
| 207 |
-
if audio_recorded:
|
| 208 |
-
audio = AudioSegment.from_wav(io.BytesIO(st.session_state.rec_widget.getvalue()))
|
| 209 |
-
st.write(f"Frame rate: {audio.frame_rate}, Frame width: {audio.frame_width}, Duration: {audio.duration_seconds} seconds")
|
| 210 |
-
|
| 211 |
-
if st.session_state.stt_voice_isolation:
|
| 212 |
-
# Isoler l'audio ici
|
| 213 |
-
audio = isolate_audio(audio)
|
| 214 |
-
|
| 215 |
-
# if not st.session_state.language_detected:
|
| 216 |
-
# # 1. Verifie si audio.duration_seconds est superieur a 600 secondes (10 minutes)
|
| 217 |
-
# if audio.duration_seconds > 600:
|
| 218 |
-
# # PyDub handles time in milliseconds
|
| 219 |
-
# ten_minutes = 10 * 60 * 1000
|
| 220 |
-
# first_ten_minutes_audio = audio[:ten_minutes]
|
| 221 |
-
# else:
|
| 222 |
-
# # less than ten minutes ... nervermind, the name of this variable is
|
| 223 |
-
# first_ten_minutes_audio = deepcopy(audio)
|
| 224 |
-
#
|
| 225 |
-
# with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as tmp_extract:
|
| 226 |
-
# first_ten_minutes_audio.export(tmp_extract, format="mp3")
|
| 227 |
-
# tmp_extract.close()
|
| 228 |
-
#
|
| 229 |
-
# # il faut transcrire sans specifier l'argument language dans la fonction transcribe_audio
|
| 230 |
-
# # ensuite on pourra utiliser la fonction detect_language pour detecter la langue du texte transcrit
|
| 231 |
-
#
|
| 232 |
-
# # Transcrire les 10 premiers minutes audio en texte
|
| 233 |
-
# st.session_state.language_detected = detect_language(
|
| 234 |
-
# input_text = transcribe_audio(tmp_extract),
|
| 235 |
-
# temperature = 0.2,
|
| 236 |
-
# context_window = 512,
|
| 237 |
-
# model = "gpt-4o-mini"
|
| 238 |
-
# )
|
| 239 |
-
# first_ten_minutes_audio = AudioSegment.empty()
|
| 240 |
-
#
|
| 241 |
-
# st.markdown(
|
| 242 |
-
# f"- {get_translation('langue_detectee')} {convert_iso6391_to_language_name(st.session_state.language_detected)}"
|
| 243 |
-
# )
|
| 244 |
-
#
|
| 245 |
-
#
|
| 246 |
-
# ##############################################################
|
| 247 |
-
|
| 248 |
-
try:
|
| 249 |
-
with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as tmp_file:
|
| 250 |
-
audio.export(tmp_file, format="mp3")
|
| 251 |
-
tmp_file.close()
|
| 252 |
-
|
| 253 |
-
# Transcrire l'audio en texte
|
| 254 |
-
#st.session_state.transcription = transcribe_audio(
|
| 255 |
-
# tmp_file,
|
| 256 |
-
# language=convert_language_name_to_iso6391(st.session_state.interface_language)
|
| 257 |
-
#)
|
| 258 |
-
st.session_state.transcription = huggingface_endpoints_stt(tmp_file)
|
| 259 |
-
audio = AudioSegment.empty()
|
| 260 |
-
|
| 261 |
-
st.markdown(
|
| 262 |
-
f"๐ {get_translation('langue_interface')} {st.session_state.interface_language}({convert_language_name_to_iso6391(st.session_state.interface_language)})"
|
| 263 |
-
)
|
| 264 |
-
|
| 265 |
-
st.markdown(
|
| 266 |
-
f"๐ค {get_translation('transcription_audio')} {st.session_state.transcription}"
|
| 267 |
-
)
|
| 268 |
-
|
| 269 |
-
st.session_state.audio_list = []
|
| 270 |
-
for cursor_selected_lang in st.session_state.selected_languages:
|
| 271 |
-
st.session_state.target_language = cursor_selected_lang["iso-639-1"]
|
| 272 |
-
st.session_state.full_response = ""
|
| 273 |
-
|
| 274 |
-
|
| 275 |
-
# Initialisation du mode de traitement pour la langue cible actuelle
|
| 276 |
-
st.session_state.system_prompt, st.session_state.operation_prompt = init_langs_for_processing(
|
| 277 |
-
target_language = st.session_state.target_language,
|
| 278 |
-
interface_language = st.session_state.interface_language,
|
| 279 |
-
language_detected = st.session_state.interface_language
|
| 280 |
-
)
|
| 281 |
-
|
| 282 |
-
with st.chat_message("assistant", avatar="๐ป"):
|
| 283 |
-
message_placeholder = st.empty()
|
| 284 |
-
st.session_state.response_generator = process_message(
|
| 285 |
-
st.session_state.transcription,
|
| 286 |
-
st.session_state.operation_prompt,
|
| 287 |
-
st.session_state.system_prompt
|
| 288 |
-
)
|
| 289 |
-
|
| 290 |
-
for response_chunk in st.session_state.response_generator:
|
| 291 |
-
message_placeholder.markdown(response_chunk)
|
| 292 |
-
st.session_state.end_response = st.session_state.response_generator.close()
|
| 293 |
-
|
| 294 |
-
if st.session_state.full_response != "":
|
| 295 |
-
message_placeholder.markdown(st.session_state.full_response)
|
| 296 |
-
if st.session_state.enable_tts_for_input_from_audio_record:
|
| 297 |
-
st.session_state.tts_audio, st.session_state.tts_duration = process_tts_message(
|
| 298 |
-
st.session_state.full_response
|
| 299 |
-
)
|
| 300 |
-
|
| 301 |
-
if st.session_state.tts_audio:
|
| 302 |
-
st.session_state.audio_list.append(
|
| 303 |
-
( st.session_state.tts_audio,
|
| 304 |
-
st.session_state.tts_duration )
|
| 305 |
-
)
|
| 306 |
-
else:
|
| 307 |
-
pass
|
| 308 |
-
|
| 309 |
-
if st.session_state.audio_list:
|
| 310 |
-
st.session_state.final_audio = concatenate_audio_files(st.session_state.audio_list)
|
| 311 |
-
|
| 312 |
-
with st.container(border=True):
|
| 313 |
-
|
| 314 |
-
# Gรฉnรฉrer un nom de fichier unique
|
| 315 |
-
st.session_state.timestamp = time.strftime("%Y%m%d-%H%M%S")
|
| 316 |
-
st.session_state.langues = "_".join([lang["iso-639-1"] for lang in st.session_state.selected_languages])
|
| 317 |
-
st.session_state.nom_fichier = f"reponse_audio_{st.session_state.langues}_{st.session_state.timestamp}.mp3"
|
| 318 |
-
|
| 319 |
-
st.audio(st.session_state.final_audio,
|
| 320 |
-
format="audio/mpeg",
|
| 321 |
-
autoplay=st.session_state.autoplay_tts)
|
| 322 |
-
|
| 323 |
-
st.download_button(
|
| 324 |
-
label=f"๐ฅ {get_translation('telecharger_audio')}",
|
| 325 |
-
data=st.session_state.final_audio,
|
| 326 |
-
file_name=st.session_state.nom_fichier,
|
| 327 |
-
mime="audio/mpeg",
|
| 328 |
-
use_container_width=True,
|
| 329 |
-
type="primary",
|
| 330 |
-
key=f"download_button_{st.session_state.langues}_{st.session_state.timestamp}",
|
| 331 |
-
)
|
| 332 |
-
|
| 333 |
-
except Exception as e:
|
| 334 |
-
st.error(f"[AUDIO] - {get_translation('erreur_importation_audio')}: {str(e)}")
|
| 335 |
-
|
| 336 |
-
def main_page():
|
| 337 |
-
"""Page principale de l'application."""
|
| 338 |
-
|
| 339 |
-
if "audio_list" not in st.session_state:
|
| 340 |
-
st.session_state.audio_list = []
|
| 341 |
-
|
| 342 |
-
if "ui_chat_input_disabled" not in st.session_state:
|
| 343 |
-
st.session_state.ui_chat_input_disabled = False
|
| 344 |
-
|
| 345 |
-
if "ui_audio_input_disabled" not in st.session_state:
|
| 346 |
-
st.session_state.ui_audio_input_disabled = False
|
| 347 |
-
|
| 348 |
-
if "ui_filesuploader_disabled" not in st.session_state:
|
| 349 |
-
st.session_state.ui_filesuploader_disabled = False
|
| 350 |
-
|
| 351 |
-
# Dictionnaire pour stocker les fichiers modifiรฉs
|
| 352 |
-
if 'changed_uploaded_files' not in st.session_state:
|
| 353 |
-
st.session_state.changed_uploaded_files = {}
|
| 354 |
-
|
| 355 |
-
# Dictionnaire pour stocker le contenu modifiรฉ des fichiers
|
| 356 |
-
if 'edited_texts' not in st.session_state:
|
| 357 |
-
st.session_state.edited_texts = {}
|
| 358 |
-
|
| 359 |
-
# Liste pour stocker les fichiers audio
|
| 360 |
-
if 'audio_files' not in st.session_state:
|
| 361 |
-
st.session_state.audio_files = []
|
| 362 |
-
|
| 363 |
-
# Initialisation des variables d'รฉtat de session
|
| 364 |
-
if "ui_loaded" not in st.session_state:
|
| 365 |
-
st.session_state["ui_loaded"] = False
|
| 366 |
-
|
| 367 |
-
if "language_detected" not in st.session_state:
|
| 368 |
-
st.session_state["language_detected"] = None
|
| 369 |
-
|
| 370 |
-
if "process_mode" not in st.session_state:
|
| 371 |
-
st.session_state["process_mode"] = "translation"
|
| 372 |
-
|
| 373 |
-
if "target_language" not in st.session_state:
|
| 374 |
-
st.session_state.target_language = "en"
|
| 375 |
-
|
| 376 |
-
if "selected_languages" not in st.session_state:
|
| 377 |
-
st.session_state.selected_languages = [
|
| 378 |
-
{"language": "English", "iso-639-1": "en"}
|
| 379 |
-
]
|
| 380 |
-
|
| 381 |
-
if "interface_language_select" not in st.session_state:
|
| 382 |
-
st.session_state.interface_language_select = "English" # Langue par dรฉfaut
|
| 383 |
-
|
| 384 |
-
if "stt_voice_isolation" not in st.session_state:
|
| 385 |
-
st.session_state["stt_voice_isolation"] = False
|
| 386 |
-
|
| 387 |
-
if "enable_tts_for_input_from_audio_record" not in st.session_state:
|
| 388 |
-
st.session_state["enable_tts_for_input_from_audio_record"] = False
|
| 389 |
-
|
| 390 |
-
if "autoplay_tts" not in st.session_state:
|
| 391 |
-
st.session_state["autoplay_tts"] = False
|
| 392 |
-
|
| 393 |
-
if "enable_tts_for_input_from_text_field" not in st.session_state:
|
| 394 |
-
st.session_state["enable_tts_for_input_from_text_field"] = False
|
| 395 |
-
|
| 396 |
-
if "tts_voice" not in st.session_state:
|
| 397 |
-
st.session_state["tts_voice"] = "onyx"
|
| 398 |
-
|
| 399 |
-
# Initialisation de l'historique des messages avec le prompt systรจme
|
| 400 |
-
if "messages" not in st.session_state:
|
| 401 |
-
st.session_state.messages = []
|
| 402 |
-
|
| 403 |
-
def set_session_selected_languages(
|
| 404 |
-
selected_language_names: Optional[ List[ AnyStr ] ] = ["English"]
|
| 405 |
-
) -> None:
|
| 406 |
-
st.session_state.selected_languages = [
|
| 407 |
-
{"language": lang, "iso-639-1": convert_language_name_to_iso6391(lang)}
|
| 408 |
-
for lang in selected_language_names
|
| 409 |
-
]
|
| 410 |
-
|
| 411 |
-
def on_languages_change() -> None:
|
| 412 |
-
"""Fonction de rappel pour le changement de langue(s) de destination."""
|
| 413 |
-
selected_language_names: List[str] = st.session_state.language_selector
|
| 414 |
-
set_session_selected_languages(selected_language_names)
|
| 415 |
-
|
| 416 |
-
# Configuration de la barre latรฉrale
|
| 417 |
-
with st.sidebar:
|
| 418 |
-
st.logo("img/logo_2.png", icon_image="img/logo_2.png")
|
| 419 |
-
st.header(get_translation("sidebar_titre"))
|
| 420 |
-
|
| 421 |
-
st.write(f"#### Settings")
|
| 422 |
-
if st.button(f"Speech-To-Text"):
|
| 423 |
-
stt_settings(state__stt_voice_isolation=st.session_state.stt_voice_isolation)
|
| 424 |
-
|
| 425 |
-
if st.button(f"Text-To-Speech"):
|
| 426 |
-
tts_settings(
|
| 427 |
-
name__tts_voice = st.session_state.tts_voice,
|
| 428 |
-
state__tts_with_text = st.session_state.enable_tts_for_input_from_text_field,
|
| 429 |
-
state__tts_with_audio = st.session_state.enable_tts_for_input_from_audio_record,
|
| 430 |
-
state__autoplay_tts = st.session_state.autoplay_tts
|
| 431 |
-
)
|
| 432 |
-
|
| 433 |
-
with st.expander(f"{get_translation('a_propos')}",
|
| 434 |
-
expanded=False,
|
| 435 |
-
icon="โน๏ธ"):
|
| 436 |
-
st.subheader(f"version: {__version__}")
|
| 437 |
-
st.info(get_translation("info_app"))
|
| 438 |
-
|
| 439 |
-
with st.expander(f"{get_translation('selection_langue')}",
|
| 440 |
-
expanded=True,
|
| 441 |
-
icon="๐"):
|
| 442 |
-
# Conteneur pour la sรฉlection de langue
|
| 443 |
-
# Sรฉlection multiple des langues de destination
|
| 444 |
-
st.multiselect(
|
| 445 |
-
label=get_translation("langues_destination"),
|
| 446 |
-
placeholder=get_translation("placeholder_langues"),
|
| 447 |
-
options=SUPPORTED_LANGUAGES,
|
| 448 |
-
default=["English"],
|
| 449 |
-
key="language_selector",
|
| 450 |
-
max_selections=4,
|
| 451 |
-
on_change=on_languages_change,
|
| 452 |
-
format_func=lambda lang: f"{LANGUAGES_EMOJI.get(lang, '')} {lang}"
|
| 453 |
-
)
|
| 454 |
-
|
| 455 |
-
if st.session_state.get('show_report_form', False):
|
| 456 |
-
# show_report_form()
|
| 457 |
-
pass
|
| 458 |
-
else:
|
| 459 |
-
with st.container(border=True):
|
| 460 |
-
|
| 461 |
-
chat_input_tabs1, chat_input_tabs2, chat_input_tabs3 = st.tabs(["text_input", "audio_input", "file_upload_input"])
|
| 462 |
-
|
| 463 |
-
st.session_state.ui_chat_input_disabled = False
|
| 464 |
-
st.session_state.ui_audio_input_disabled = False
|
| 465 |
-
st.session_state.ui_filesuploader_disabled = False
|
| 466 |
-
|
| 467 |
-
|
| 468 |
-
with chat_input_tabs1:
|
| 469 |
-
with st.container(border=True):
|
| 470 |
-
# Interface utilisateur pour le chat textuel
|
| 471 |
-
st.session_state.user_input = st.chat_input(
|
| 472 |
-
get_translation("entrez_message"),
|
| 473 |
-
disabled=st.session_state.ui_chat_input_disabled
|
| 474 |
-
)
|
| 475 |
-
|
| 476 |
-
if st.session_state.user_input:
|
| 477 |
-
# Dรฉsactiver temporairement l'interface pendant le traitement
|
| 478 |
-
st.session_state.ui_chat_input_disabled = True
|
| 479 |
-
st.session_state.ui_audio_input_disabled = True
|
| 480 |
-
st.session_state.ui_filesuploader_disabled = True
|
| 481 |
-
|
| 482 |
-
try:
|
| 483 |
-
# Vรฉrification de la modรฉration
|
| 484 |
-
user_message_moderation_check(st.session_state.user_input)
|
| 485 |
-
|
| 486 |
-
# Dรฉtection de la langue du message
|
| 487 |
-
language_detected = detect_language(
|
| 488 |
-
input_text=st.session_state.user_input,
|
| 489 |
-
temperature=0.01,
|
| 490 |
-
context_window=512,
|
| 491 |
-
model="gpt-4o-mini"
|
| 492 |
-
)
|
| 493 |
-
|
| 494 |
-
# Rรฉinitialiser l'รฉtat prรฉcรฉdent
|
| 495 |
-
st.session_state.full_response = ""
|
| 496 |
-
st.session_state.audio_list = []
|
| 497 |
-
|
| 498 |
-
with st.chat_message("user", avatar="๐ค"):
|
| 499 |
-
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
| 500 |
-
st.write(timestamp)
|
| 501 |
-
with st.container(border=True):
|
| 502 |
-
st.subheader(f"๐ lang: {convert_iso6391_to_language_name(language_detected)}")
|
| 503 |
-
st.markdown(st.session_state.user_input)
|
| 504 |
-
|
| 505 |
-
# Traitement pour chaque langue sรฉlectionnรฉe
|
| 506 |
-
for lang in st.session_state.selected_languages:
|
| 507 |
-
st.session_state.target_language = lang["iso-639-1"]
|
| 508 |
-
|
| 509 |
-
# Initialisation du mode de traitement avec la langue dรฉtectรฉe
|
| 510 |
-
st.session_state.system_prompt, st.session_state.operation_prompt = init_langs_for_processing(
|
| 511 |
-
target_language=st.session_state.target_language,
|
| 512 |
-
interface_language=st.session_state.interface_language,
|
| 513 |
-
language_detected=language_detected
|
| 514 |
-
)
|
| 515 |
-
|
| 516 |
-
with st.chat_message("assistant", avatar="๐ป"):
|
| 517 |
-
with st.status(f"Processing response in {lang['language']}...", expanded=True) as status:
|
| 518 |
-
message_placeholder = st.empty()
|
| 519 |
-
st.session_state.response_generator = process_message(
|
| 520 |
-
st.session_state.user_input,
|
| 521 |
-
st.session_state.operation_prompt,
|
| 522 |
-
st.session_state.system_prompt
|
| 523 |
-
)
|
| 524 |
-
|
| 525 |
-
full_response = ""
|
| 526 |
-
for response_chunk in st.session_state.response_generator:
|
| 527 |
-
full_response += response_chunk
|
| 528 |
-
message_placeholder.markdown(full_response)
|
| 529 |
-
|
| 530 |
-
st.session_state.response_generator.close()
|
| 531 |
-
st.session_state.full_response = full_response
|
| 532 |
-
|
| 533 |
-
# Gรฉnรฉrer l'audio si TTS est activรฉ
|
| 534 |
-
if st.session_state.enable_tts_for_input_from_text_field:
|
| 535 |
-
status.update(label=f"Generating audio in {lang['language']}...")
|
| 536 |
-
tts_audio, tts_duration = process_tts_message(
|
| 537 |
-
full_response
|
| 538 |
-
)
|
| 539 |
-
if tts_audio:
|
| 540 |
-
st.session_state.audio_list.append(
|
| 541 |
-
(tts_audio, tts_duration)
|
| 542 |
-
)
|
| 543 |
-
status.update(label="Done!", state="complete")
|
| 544 |
-
|
| 545 |
-
# Gรฉnรฉrer l'audio final si nรฉcessaire
|
| 546 |
-
if st.session_state.audio_list:
|
| 547 |
-
with st.container(border=True):
|
| 548 |
-
st.session_state.final_audio = concatenate_audio_files(st.session_state.audio_list)
|
| 549 |
-
|
| 550 |
-
# Gรฉnรฉrer un nom de fichier unique
|
| 551 |
-
st.session_state.timestamp = time.strftime("%Y%m%d-%H%M%S")
|
| 552 |
-
st.session_state.langues = "_".join([lang["iso-639-1"] for lang in st.session_state.selected_languages])
|
| 553 |
-
st.session_state.nom_fichier = f"reponse_audio_{st.session_state.langues}_{st.session_state.timestamp}.mp3"
|
| 554 |
-
|
| 555 |
-
st.audio(
|
| 556 |
-
st.session_state.final_audio,
|
| 557 |
-
format="audio/mpeg",
|
| 558 |
-
autoplay=st.session_state.autoplay_tts
|
| 559 |
-
)
|
| 560 |
-
|
| 561 |
-
st.download_button(
|
| 562 |
-
label=f"๐ฅ {get_translation('telecharger_audio')}",
|
| 563 |
-
data=st.session_state.final_audio,
|
| 564 |
-
file_name=st.session_state.nom_fichier,
|
| 565 |
-
mime="audio/mpeg",
|
| 566 |
-
use_container_width=True,
|
| 567 |
-
type="primary",
|
| 568 |
-
key=f"download_button_{st.session_state.langues}_{st.session_state.timestamp}"
|
| 569 |
-
)
|
| 570 |
-
|
| 571 |
-
except Exception as e:
|
| 572 |
-
st.error(f"Une erreur s'est produite : {str(e)}")
|
| 573 |
-
finally:
|
| 574 |
-
# Rรฉactiver l'interface
|
| 575 |
-
st.session_state.ui_chat_input_disabled = False
|
| 576 |
-
st.session_state.ui_audio_input_disabled = False
|
| 577 |
-
st.session_state.ui_filesuploader_disabled = False
|
| 578 |
-
|
| 579 |
-
with chat_input_tabs3:
|
| 580 |
-
with st.container(border=True):
|
| 581 |
-
# Interface utilisateur pour l'upload de fichiers
|
| 582 |
-
st.session_state.uploaded_files = st.file_uploader(
|
| 583 |
-
"Choose files to upload",
|
| 584 |
-
accept_multiple_files=True,
|
| 585 |
-
type=CHAT_FILES_UPLOAD_ALLOWED_TYPES,
|
| 586 |
-
key="chat_files_upload",
|
| 587 |
-
disabled=st.session_state.ui_filesuploader_disabled
|
| 588 |
-
)
|
| 589 |
-
|
| 590 |
-
with chat_input_tabs2:
|
| 591 |
-
with st.container(border=True):
|
| 592 |
-
# Interface utilisateur pour l'enregistrement audio
|
| 593 |
-
st.audio_input(
|
| 594 |
-
"Record a voice message",
|
| 595 |
-
on_change=recorder_released,
|
| 596 |
-
key="rec_widget",
|
| 597 |
-
disabled=st.session_state.ui_audio_input_disabled
|
| 598 |
-
)
|
| 599 |
-
|
| 600 |
-
|
| 601 |
-
with st.container(border=True):
|
| 602 |
-
if st.session_state.user_input:
|
| 603 |
-
# Traitement du message texte
|
| 604 |
-
user_message_moderation_check(st.session_state.user_input)
|
| 605 |
-
|
| 606 |
-
# Rรฉinitialiser l'รฉtat prรฉcรฉdent
|
| 607 |
-
st.session_state.full_response = ""
|
| 608 |
-
st.session_state.audio_list = []
|
| 609 |
-
|
| 610 |
-
with st.chat_message("user", avatar="๐ค"):
|
| 611 |
-
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
| 612 |
-
st.write(timestamp)
|
| 613 |
-
with st.container(border=True):
|
| 614 |
-
st.subheader(f"๐ lang: {st.session_state.interface_language}")
|
| 615 |
-
st.markdown(st.session_state.user_input)
|
| 616 |
-
|
| 617 |
-
for lang in st.session_state.selected_languages:
|
| 618 |
-
st.session_state.target_language = lang["iso-639-1"]
|
| 619 |
-
st.session_state.system_prompt, st.session_state.operation_prompt = init_langs_for_processing(
|
| 620 |
-
target_language=st.session_state.target_language,
|
| 621 |
-
interface_language=st.session_state.interface_language,
|
| 622 |
-
language_detected=st.session_state.interface_language
|
| 623 |
-
)
|
| 624 |
-
with st.chat_message("assistant", avatar="๐ป"):
|
| 625 |
-
message_placeholder = st.empty()
|
| 626 |
-
st.session_state.response_generator = process_message(
|
| 627 |
-
st.session_state.user_input,
|
| 628 |
-
st.session_state.operation_prompt,
|
| 629 |
-
st.session_state.system_prompt
|
| 630 |
-
)
|
| 631 |
-
for response_chunk in st.session_state.response_generator:
|
| 632 |
-
message_placeholder.markdown(response_chunk)
|
| 633 |
-
st.session_state.response_generator.close()
|
| 634 |
-
|
| 635 |
-
if st.session_state.uploaded_files:
|
| 636 |
-
# Traitement des fichiers tรฉlรฉchargรฉs
|
| 637 |
-
for uploaded_file in st.session_state.uploaded_files:
|
| 638 |
-
file_path = save_attachment(uploaded_file)
|
| 639 |
-
with st.chat_message("user", avatar="๐ค"):
|
| 640 |
-
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
| 641 |
-
st.write(timestamp)
|
| 642 |
-
st.subheader(f"Fichier tรฉlรฉchargรฉ: {uploaded_file.name}")
|
| 643 |
-
if uploaded_file.type.startswith("text"):
|
| 644 |
-
uploaded_file.seek(0)
|
| 645 |
-
content = uploaded_file.read().decode("utf-8")
|
| 646 |
-
with st.chat_message("user", avatar="๐ค"):
|
| 647 |
-
st.markdown(content)
|
| 648 |
-
for lang in st.session_state.selected_languages:
|
| 649 |
-
st.session_state.target_language = lang["iso-639-1"]
|
| 650 |
-
st.session_state.system_prompt, st.session_state.operation_prompt = init_langs_for_processing(
|
| 651 |
-
target_language=st.session_state.target_language,
|
| 652 |
-
interface_language=st.session_state.interface_language,
|
| 653 |
-
language_detected=st.session_state.interface_language
|
| 654 |
-
)
|
| 655 |
-
with st.chat_message("assistant", avatar="๐ป"):
|
| 656 |
-
message_placeholder = st.empty()
|
| 657 |
-
st.session_state.response_generator = process_message(
|
| 658 |
-
content,
|
| 659 |
-
st.session_state.operation_prompt,
|
| 660 |
-
st.session_state.system_prompt
|
| 661 |
-
)
|
| 662 |
-
for response_chunk in st.session_state.response_generator:
|
| 663 |
-
message_placeholder.markdown(response_chunk)
|
| 664 |
-
st.session_state.response_generator.close()
|
| 665 |
-
else:
|
| 666 |
-
with st.chat_message("assistant", avatar="๐ป"):
|
| 667 |
-
st.markdown("Type de fichier non supportรฉ pour le traitement. Seul le texte est supportรฉ.")
|
| 668 |
-
|
| 669 |
-
main_page()
|
| 670 |
-
|
| 671 |
-
|
| 672 |
-
|
| 673 |
-
|
| 674 |
-
|
| 675 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
requirements.txt
CHANGED
|
@@ -1,5 +1,6 @@
|
|
| 1 |
-
streamlit
|
|
|
|
|
|
|
|
|
|
| 2 |
openai
|
| 3 |
-
pydub
|
| 4 |
-
python-dotenv
|
| 5 |
-
elevenlabs
|
|
|
|
| 1 |
+
streamlit
|
| 2 |
+
streamlit-audiorecorder
|
| 3 |
+
streamlit-autorefresh
|
| 4 |
+
replicate
|
| 5 |
openai
|
| 6 |
+
pydub
|
|
|
|
|
|
scripts/gen_ui_lang_support.py
CHANGED
|
@@ -56,29 +56,7 @@ traductions_ui_dict = {
|
|
| 56 |
"chargement": "Chargement en cours...",
|
| 57 |
"aucun_message": "Aucun message pour le moment. Commencez la conversation !",
|
| 58 |
"telecharger_audio": "Tรฉlรฉcharger l'audio",
|
| 59 |
-
"lecture_auto_tts": "Lecture audio automatique de la synthese vocale"
|
| 60 |
-
"traitement_en_cours": "Traitement en cours...",
|
| 61 |
-
"traitement_termine": "Traitement terminรฉ !",
|
| 62 |
-
"traduction_en_cours": "Traduction en cours...",
|
| 63 |
-
"erreur_traduction": "Une erreur s'est produite lors de la traduction de votre message : {}",
|
| 64 |
-
"traduction_terminee": "Traduction terminรฉe !",
|
| 65 |
-
"synthese_vocale_en_cours": "Synthรจse vocale en cours...",
|
| 66 |
-
"erreur_synthese_vocale": "Une erreur s'est produite lors de la synthรจse vocale : {}",
|
| 67 |
-
"synthese_vocale_terminee": "Synthรจse vocale terminรฉe !",
|
| 68 |
-
"concatenation_audio_en_cours": "Concatรฉnation audio en cours...",
|
| 69 |
-
"erreur_concatenation_audio": "Une erreur s'est produite lors de la concatรฉnation audio : {}",
|
| 70 |
-
"concatenation_audio_terminee": "Concatรฉnation audio terminรฉe !",
|
| 71 |
-
"transcription_audio_en_cours": "Transcription audio en cours...",
|
| 72 |
-
"erreur_transcription_audio": "Une erreur s'est produite lors de la transcription audio : {}",
|
| 73 |
-
"transcription_audio_terminee": "Transcription audio terminรฉe !",
|
| 74 |
-
"detection_langue_en_cours": "Dรฉtection de la langue en cours...",
|
| 75 |
-
"erreur_detection_langue": "Une erreur s'est produite lors de la dรฉtection de la langue : {}",
|
| 76 |
-
"detection_langue_terminee": "Dรฉtection de la langue terminรฉe !",
|
| 77 |
-
"transcription_audio": "Transcription audio : {}",
|
| 78 |
-
"confirmer" : "Confirmer",
|
| 79 |
-
"Annuler" : "Annuler",
|
| 80 |
-
"selection_de_la_langue": "Sรฉlection de la langue",
|
| 81 |
-
"choix_selection_langue": "Choisissez la langue de l'interface"
|
| 82 |
}
|
| 83 |
}
|
| 84 |
|
|
|
|
| 56 |
"chargement": "Chargement en cours...",
|
| 57 |
"aucun_message": "Aucun message pour le moment. Commencez la conversation !",
|
| 58 |
"telecharger_audio": "Tรฉlรฉcharger l'audio",
|
| 59 |
+
"lecture_auto_tts": "Lecture audio automatique de la synthese vocale"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 60 |
}
|
| 61 |
}
|
| 62 |
|
scripts/list_and_delete_assistants.py
DELETED
|
@@ -1,31 +0,0 @@
|
|
| 1 |
-
#!/usr/bin/env python3
|
| 2 |
-
#coding: utf-8
|
| 3 |
-
import os
|
| 4 |
-
|
| 5 |
-
def list_and_delete_fine_tuned_models():
|
| 6 |
-
import openai
|
| 7 |
-
from openai import OpenAI
|
| 8 |
-
# Assurez-vous que votre clรฉ API est dรฉfinie dans les variables d'environnement
|
| 9 |
-
api_key = os.getenv("OPENAI_API_KEY")
|
| 10 |
-
client = OpenAI(api_key=api_key)
|
| 11 |
-
|
| 12 |
-
while True:
|
| 13 |
-
# Lister tous les assistants
|
| 14 |
-
assistants_list = client.beta.assistants.list(
|
| 15 |
-
order="desc",
|
| 16 |
-
limit="20",
|
| 17 |
-
)
|
| 18 |
-
|
| 19 |
-
if len(assistants_list.data) < 1:
|
| 20 |
-
break
|
| 21 |
-
|
| 22 |
-
for assistant in assistants_list.data:
|
| 23 |
-
print(f"Suppression de : {assistant.id} ({assistant.name}) ...", end="")
|
| 24 |
-
response = client.beta.assistants.delete(assistant.id)
|
| 25 |
-
if response.deleted:
|
| 26 |
-
print("OK")
|
| 27 |
-
else:
|
| 28 |
-
print("FAILED")
|
| 29 |
-
|
| 30 |
-
if __name__ == '__main__':
|
| 31 |
-
list_and_delete_fine_tuned_models()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
scripts/ui_lang_updater.py
DELETED
|
@@ -1,177 +0,0 @@
|
|
| 1 |
-
#!/usr/bin/env python3
|
| 2 |
-
#coding:utf-8
|
| 3 |
-
|
| 4 |
-
from openai import OpenAI
|
| 5 |
-
from os import getenv
|
| 6 |
-
from typing import Optional
|
| 7 |
-
from typing import Union
|
| 8 |
-
from typing import Any
|
| 9 |
-
import re
|
| 10 |
-
import json
|
| 11 |
-
import datetime
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
SUPPORTED_LANGUAGES=[
|
| 15 |
-
"Afrikaans", "Arabic", "Armenian", "Azerbaijani", "Belarusian", "Bosnian", "Bulgarian", "Catalan", "Chinese", "Croatian", "Czech", "Danish", "Dutch", "English", "Estonian", "Finnish", "French", "Galician", "German", "Greek", "Hebrew", "Hindi", "Hungarian", "Icelandic", "Indonesian", "Italian", "Japanese", "Kannada", "Kazakh", "Korean", "Latvian", "Lithuanian", "Macedonian", "Malay", "Marathi", "Maori", "Nepali", "Norwegian", "Persian", "Polish", "Portuguese", "Romanian", "Russian", "Serbian", "Slovak", "Slovenian", "Spanish", "Swahili", "Swedish", "Tagalog", "Tamil", "Thai", "Turkish", "Ukrainian", "Urdu", "Vietnamese", "Welsh"
|
| 16 |
-
]
|
| 17 |
-
|
| 18 |
-
SUPPORTED_LANGUAGES_FR_ = [
|
| 19 |
-
"Afrikaans", "Arabe", "Armรฉnien", "Azรฉri", "Biรฉlorusse", "Bosniaque", "Bulgare", "Catalan", "Chinois", "Croate", "Tchรจque", "Danois", "Nรฉerlandais", "Anglais", "Estonien", "Finnois", "Franรงais", "Galicien", "Allemand", "Grec", "Hรฉbreu", "Hindi", "Hongrois", "Islandais", "Indonรฉsien", "Italien", "Japonais", "Kannada", "Kazakh", "Corรฉen", "Letton", "Lituanien", "Macรฉdonien", "Malais", "Marathi", "Maori", "Nรฉpalais", "Norvรฉgien", "Persan", "Polonais", "Portugais", "Roumain", "Russe", "Serbe", "Slovaque", "Slovรจne", "Espagnol", "Swahili", "Suรฉdois", "Tagalog", "Tamoul", "Thaรฏ", "Turc", "Ukrainien", "Ourdou", "Vietnamien", "Gallois"
|
| 20 |
-
]
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
def read_json_file(file_path: str) -> dict:
|
| 26 |
-
with open(file_path, "r", encoding='utf-8') as json_file:
|
| 27 |
-
return json.load(json_file)
|
| 28 |
-
|
| 29 |
-
def write_json_file(file_path: str, data: Union[str, bytes, dict]) -> None:
|
| 30 |
-
try:
|
| 31 |
-
if type(data) == str:
|
| 32 |
-
data = data.encode("utf-8")
|
| 33 |
-
if type(data) == dict:
|
| 34 |
-
data = json.dumps(data, indent=4, ensure_ascii=False, sort_keys=False).encode("utf-8")
|
| 35 |
-
with open(file_path, "wb") as json_file:
|
| 36 |
-
json_file.write(data)
|
| 37 |
-
json_file.close()
|
| 38 |
-
except Exception as e:
|
| 39 |
-
raise e
|
| 40 |
-
|
| 41 |
-
def get_prompt_translation_ui_text(base_lang: str,
|
| 42 |
-
target_lang: str) -> str:
|
| 43 |
-
prompt_ = "\n".join([
|
| 44 |
-
f"Je souhaite que vous agissiez en tant que traducteur linguistique, correcteur d'orthographe et amรฉliorateur.",
|
| 45 |
-
f"Je vous donne un texte en {base_lang} et je vous demande de le traduire en {target_lang}. Puis rรฉpondez dans une version corrigรฉe et amรฉliorรฉe de mon texte, dans la langue de destination.",
|
| 46 |
-
f"Je souhaite que vous remplaciez mes mots et phrases par des termes et expressions les plus appropriรฉs dans la langue de destination. Conservez le mรชme sens, mais corrigez les fautes.",
|
| 47 |
-
f"Je vous demande de ne rรฉpondre que par les corrections et amรฉliorations, sans ajouter d'explications.",
|
| 48 |
-
f"Lorsque je soumets un texte ร traiter, je procรฉderai de la maniรจre suivante pour dรฉlimiter le dรฉbut et la fin du ou des blocs de texte que tu devras traiter : j'utiliserai trois caractรจres de guillemets doubles pour dรฉlimiter le dรฉbut d'un bloc de texte ร traiter ; j'utiliserai trois caractรจres de guillemets doubles pour dรฉlimiter la fin d'un bloc de texte ร traiter. Le texte sera compris entre ces dรฉlimitations, que ce soit par format ou par taille du texte dans un bloc ร traiter. Si mon texte est sur une seule ligne, il est possible que je place รฉgalement les dรฉlimiteurs de dรฉbut et de fin sur la mรชme ligne.",
|
| 49 |
-
f"Il est impรฉratif que tu respectes le format d'origine au mieux ; donc, ร moins qu'il soit absolument nรฉcessaire de corriger le formatage, respecte au mieux ce dรฉtail. (Par exemple, tout en une ligne ou sur plusieurs... tu respectes le formatage d'origine. Par contre, tu peux, par exemple, corriger les espaces, tabulations et autres รฉlรฉments superflus.)"
|
| 50 |
-
])
|
| 51 |
-
return prompt_
|
| 52 |
-
|
| 53 |
-
def get_openai_connected_client():
|
| 54 |
-
openai_client = OpenAI(api_key=getenv("OPENAI_API_KEY"))
|
| 55 |
-
return openai_client
|
| 56 |
-
|
| 57 |
-
def create_chat_completion(client,
|
| 58 |
-
system_prompt,
|
| 59 |
-
user_input,
|
| 60 |
-
model="gpt-4o-mini",
|
| 61 |
-
temperature=0.01,):
|
| 62 |
-
response = client.chat.completions.create(
|
| 63 |
-
model=f"{model}",
|
| 64 |
-
temperature=temperature,
|
| 65 |
-
messages=[
|
| 66 |
-
{
|
| 67 |
-
"role": "system",
|
| 68 |
-
"content": f"{system_prompt}"
|
| 69 |
-
},
|
| 70 |
-
{
|
| 71 |
-
"role": "user",
|
| 72 |
-
"content": f"{user_input}"
|
| 73 |
-
}
|
| 74 |
-
]
|
| 75 |
-
)
|
| 76 |
-
return response.choices[0].message.content
|
| 77 |
-
|
| 78 |
-
def remove_response_separator(response_text:str) -> str:
|
| 79 |
-
# Utiliser un regex pour retirer les trois premiรจres et derniรจres doubles quotes
|
| 80 |
-
return re.sub(r'^"{3}|"{3}$', '', response_text.strip())
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
def translate_ui_template_string(file_path: str,
|
| 84 |
-
base_ui_lang: str,
|
| 85 |
-
target_ui_lang: str) -> str:
|
| 86 |
-
openai_client = get_openai_connected_client()
|
| 87 |
-
PROMPT_ = get_prompt_translation_ui_text(base_ui_lang, target_ui_lang)
|
| 88 |
-
ui_lang_dict = read_json_file(file_path)
|
| 89 |
-
traductions_ui_dict[target_ui_lang] = {}
|
| 90 |
-
for key, text_ui in ui_lang_dict[base_ui_lang].items():
|
| 91 |
-
operation_prompt = f"Translate(\'{base_ui_lang}\' to \'{target_ui_lang}\')"
|
| 92 |
-
traduction_text_ui = remove_response_separator(
|
| 93 |
-
create_chat_completion(
|
| 94 |
-
openai_client,
|
| 95 |
-
PROMPT_,
|
| 96 |
-
f'{operation_prompt} :\n\"\"\"\n{text_ui}\n\"\"\"'
|
| 97 |
-
)
|
| 98 |
-
).strip()
|
| 99 |
-
traductions_ui_dict[target_ui_lang][key] = traduction_text_ui
|
| 100 |
-
return json.dumps(traductions_ui_dict, indent=4, ensure_ascii=False, sort_keys=False)
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
def generate_ui_lang_support(
|
| 104 |
-
base_ui_lang: Optional[str] = "French",
|
| 105 |
-
target_ui_lang: Optional[Union[str, list]] = SUPPORTED_LANGUAGES,
|
| 106 |
-
ui_lang_support_filepath: Optional[str] = "ui_lang_support.json",
|
| 107 |
-
skip_translated: Optional[bool] = True) -> str:
|
| 108 |
-
|
| 109 |
-
traductions_ui_dict = read_json_file(ui_lang_support_filepath)
|
| 110 |
-
|
| 111 |
-
if list == type(target_ui_lang):
|
| 112 |
-
if base_ui_lang in target_ui_lang:
|
| 113 |
-
target_ui_lang.remove(base_ui_lang)
|
| 114 |
-
elif str == type(target_ui_lang):
|
| 115 |
-
if base_ui_lang == target_ui_lang:
|
| 116 |
-
raise ValueError(f"La langue de l'interface {base_ui_lang} est la mรชme que la langue de destination {target_ui_lang}. Veuillez choisir une autre langue de destination.")
|
| 117 |
-
|
| 118 |
-
openai_client = get_openai_connected_client()
|
| 119 |
-
|
| 120 |
-
cnt=0
|
| 121 |
-
for to_lang in target_ui_lang:
|
| 122 |
-
cnt += 1
|
| 123 |
-
print(f"\033[93m[{cnt}/{len(target_ui_lang)}]\033[0m \033[92mTranslating UI text to \033[0m\033[96m{to_lang}...\033[0m")
|
| 124 |
-
PROMPT_ = get_prompt_translation_ui_text(base_ui_lang, to_lang)
|
| 125 |
-
if f"{to_lang}" not in traductions_ui_dict.keys():
|
| 126 |
-
traductions_ui_dict[to_lang] = {}
|
| 127 |
-
it_cnt=0
|
| 128 |
-
for key, text_ui in traductions_ui_dict[base_ui_lang].items():
|
| 129 |
-
it_cnt += 1
|
| 130 |
-
print(f" \033[93m[{it_cnt}/{len(traductions_ui_dict[base_ui_lang].keys())}]\033[0m \033[92mTranslating UI text value of key \033[0m\033[96m'{key}'...\033[0m")
|
| 131 |
-
if (f"{key}" not in traductions_ui_dict[to_lang].keys() ) or (not skip_translated):
|
| 132 |
-
operation_prompt = f"Translate(\'{base_ui_lang}\' to \'{to_lang}\')"
|
| 133 |
-
traduction_text_ui = remove_response_separator(
|
| 134 |
-
create_chat_completion(
|
| 135 |
-
openai_client,
|
| 136 |
-
PROMPT_,
|
| 137 |
-
f'{operation_prompt} :\n\"\"\"\n{text_ui}\n\"\"\"'
|
| 138 |
-
)
|
| 139 |
-
).strip()
|
| 140 |
-
traductions_ui_dict[to_lang][key] = traduction_text_ui
|
| 141 |
-
|
| 142 |
-
return json.dumps(traductions_ui_dict, indent=4, ensure_ascii=False, sort_keys=False)
|
| 143 |
-
|
| 144 |
-
def init_template_base_lang(file_path: str,
|
| 145 |
-
base_ui_lang_dict: dict) -> None:
|
| 146 |
-
base_ui_lang = json.dumps(base_ui_lang_dict, indent=4, ensure_ascii=False, sort_keys=False).encode("utf-8")
|
| 147 |
-
with open(f"/tmp/{file_path}.json", "wb") as json_file:
|
| 148 |
-
json_file.write(base_ui_lang)
|
| 149 |
-
json_file.close()
|
| 150 |
-
|
| 151 |
-
def read_template_base_lang(file_path: str) -> str:
|
| 152 |
-
with open(f"/tmp/{file_path}.json", "rb") as json_file:
|
| 153 |
-
bytes_data = json_file.read()
|
| 154 |
-
data = bytes_data.decode("utf-8")
|
| 155 |
-
return data
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
if __name__ == "__main__":
|
| 159 |
-
|
| 160 |
-
JSON_UI_LANG_SUPPORT = generate_ui_lang_support(
|
| 161 |
-
base_ui_lang="French",
|
| 162 |
-
target_ui_lang=SUPPORTED_LANGUAGES
|
| 163 |
-
)
|
| 164 |
-
|
| 165 |
-
# encoding , fr_FR.UTF-8
|
| 166 |
-
if type(JSON_UI_LANG_SUPPORT) == str:
|
| 167 |
-
JSON_UI_LANG_SUPPORT = JSON_UI_LANG_SUPPORT.encode("utf-8")
|
| 168 |
-
|
| 169 |
-
# timestamp string format : "MM-DD-YYYY_HH-MM-SS"
|
| 170 |
-
timestamp = datetime.datetime.now().strftime("%m-%d-%Y_%H-%M-%S")
|
| 171 |
-
with open(f"ui_lang_support_{timestamp}.json", "wb") as output_file:
|
| 172 |
-
output_file.write(JSON_UI_LANG_SUPPORT)
|
| 173 |
-
output_file.close()
|
| 174 |
-
|
| 175 |
-
# print GREEN FG : "Success and Done !" message
|
| 176 |
-
print("\033[92mSuccess and Done !\033[0m")
|
| 177 |
-
exit(0)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
ui_lang_support.json
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
var_app.py
DELETED
|
@@ -1,57 +0,0 @@
|
|
| 1 |
-
__version__ = "1.3.10"
|
| 2 |
-
|
| 3 |
-
from core.files import read_file
|
| 4 |
-
|
| 5 |
-
CHAT_FILES_UPLOAD_ALLOWED_TYPES = [
|
| 6 |
-
"txt",
|
| 7 |
-
"wav", "mp3"
|
| 8 |
-
]
|
| 9 |
-
|
| 10 |
-
# List of languages supported by the application
|
| 11 |
-
SUPPORTED_LANGUAGES = [
|
| 12 |
-
"Afrikaans", "Arabic", "Armenian", "Azerbaijani", "Belarusian", "Bosnian",
|
| 13 |
-
"Bulgarian", "Catalan", "Chinese", "Croatian", "Czech", "Danish", "Dutch",
|
| 14 |
-
"English", "Estonian", "Finnish", "French", "Galician", "German", "Greek",
|
| 15 |
-
"Hebrew", "Hindi", "Hungarian", "Icelandic", "Indonesian", "Italian",
|
| 16 |
-
"Japanese", "Kannada", "Kazakh", "Korean", "Latvian", "Lithuanian",
|
| 17 |
-
"Macedonian", "Malay", "Marathi", "Maori", "Nepali", "Norwegian", "Persian",
|
| 18 |
-
"Polish", "Portuguese", "Romanian", "Russian", "Serbian", "Slovak",
|
| 19 |
-
"Slovenian", "Spanish", "Swahili", "Swedish", "Tagalog", "Tamil", "Thai",
|
| 20 |
-
"Turkish", "Ukrainian", "Urdu", "Vietnamese", "Welsh"
|
| 21 |
-
]
|
| 22 |
-
|
| 23 |
-
LANGUAGES_EMOJI = {
|
| 24 |
-
"Afrikaans": "๐ฟ๐ฆ", "Arabic": "๐ธ๐ฆ", "Armenian": "๐ฆ๐ฒ", "Azerbaijani": "๐ฆ๐ฟ", "Belarusian": "๐ง๐พ",
|
| 25 |
-
"Bosnian": "๐ง๐ฆ", "Bulgarian": "๐ง๐ฌ", "Catalan": "๐ช๐ธ", "Chinese": "๐จ๐ณ", "Croatian": "๐ญ๐ท",
|
| 26 |
-
"Czech": "๐จ๐ฟ", "Danish": "๐ฉ๐ฐ", "Dutch": "๐ณ๐ฑ", "English": "๐ฌ๐ง", "Estonian": "๐ช๐ช",
|
| 27 |
-
"Finnish": "๐ซ๐ฎ", "French": "๐ซ๐ท", "Galician": "๐ช๐ธ", "German": "๐ฉ๐ช", "Greek": "๐ฌ๐ท",
|
| 28 |
-
"Hebrew": "๐ฎ๐ฑ", "Hindi": "๐ฎ๐ณ", "Hungarian": "๐ญ๐บ", "Icelandic": "๐ฎ๐ธ", "Indonesian": "๐ฎ๐ฉ",
|
| 29 |
-
"Italian": "๐ฎ๐น", "Japanese": "๐ฏ๐ต", "Kannada": "๐ฎ๐ณ", "Kazakh": "๐ฐ๐ฟ", "Korean": "๐ฐ๐ท",
|
| 30 |
-
"Latvian": "๐ฑ๐ป", "Lithuanian": "๐ฑ๐น", "Macedonian": "๐ฒ๐ฐ", "Malay": "๐ฒ๐พ", "Marathi": "๐ฎ๐ณ",
|
| 31 |
-
"Maori": "๐ณ๐ฟ", "Nepali": "๐ณ๐ต", "Norwegian": "๐ณ๐ด", "Persian": "๐ฎ๐ท", "Polish": "๐ต๐ฑ",
|
| 32 |
-
"Portuguese": "๐ต๐น", "Romanian": "๐ท๐ด", "Russian": "๐ท๐บ", "Serbian": "๐ท๐ธ", "Slovak": "๐ธ๐ฐ",
|
| 33 |
-
"Slovenian": "๐ธ๐ฎ", "Spanish": "๐ช๐ธ", "Swahili": "๐ฐ๐ช", "Swedish": "๐ธ๐ช", "Tagalog": "๐ต๐ญ",
|
| 34 |
-
"Tamil": "๐ฎ๐ณ", "Thai": "๐น๐ญ", "Turkish": "๐น๐ท", "Ukrainian": "๐บ๐ฆ", "Urdu": "๐ต๐ฐ",
|
| 35 |
-
"Vietnamese": "๐ป๐ณ", "Welsh": "๐ด๓ ง๓ ข๓ ท๓ ฌ๓ ณ๓ ฟ"
|
| 36 |
-
}
|
| 37 |
-
|
| 38 |
-
class GlobalSystemPrompts:
|
| 39 |
-
"""Class to store global system prompts."""
|
| 40 |
-
|
| 41 |
-
@staticmethod
|
| 42 |
-
def linguascribe():
|
| 43 |
-
"""
|
| 44 |
-
Retrieve the system prompt for the Linguascribe feature.
|
| 45 |
-
|
| 46 |
-
Returns:
|
| 47 |
-
str: The system prompt for Linguascribe.
|
| 48 |
-
"""
|
| 49 |
-
try:
|
| 50 |
-
system_prompt = read_file('linguascribe.prompt')
|
| 51 |
-
return system_prompt
|
| 52 |
-
except FileNotFoundError:
|
| 53 |
-
print("Le fichier 'linguascribe.prompt' n'a pas รฉtรฉ trouvรฉ.")
|
| 54 |
-
return ""
|
| 55 |
-
except IOError as e:
|
| 56 |
-
print(f"Erreur lors de la lecture du fichier 'linguascribe.prompt': {e}")
|
| 57 |
-
return ""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|