CrazyMonkey0 commited on
Commit
7eb3110
·
1 Parent(s): ecab563

Initial APP

Browse files
Dockerfile ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.9
2
+
3
+ RUN useradd -m -u 1000 user
4
+ USER user
5
+ ENV PATH="/home/user/.local/bin:$PATH"
6
+
7
+ WORKDIR /app
8
+
9
+ COPY --chown=user ./requirements.txt requirements.txt
10
+ RUN pip install --no-cache-dir --upgrade -r requirements.txt
11
+
12
+ COPY --chown=user . /app
13
+
14
+ CMD ["gunicorn", "main:app", "-k", "uvicorn.workers.UvicornWorker", "--bind", "0.0.0.0:7860", "--workers", "2"]
app/__init__.py ADDED
File without changes
app/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (151 Bytes). View file
 
app/__pycache__/main.cpython-312.pyc ADDED
Binary file (2.19 kB). View file
 
app/main.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI
2
+ from app.routes.nlp import load_model_nlp, router as nlp_router
3
+ from app.routes.tts import load_model_tts
4
+ from app.routes.asr import load_model_asr, router as asr_router
5
+ from app.routes.translation import load_model_translation, router as trans_router
6
+ import os
7
+
8
+ # Initialize application
9
+ app = FastAPI(debug=False)
10
+
11
+ # Load the pre-trained NLP
12
+ app.state.model_nlp, app.state.tokenizer_nlp = load_model_nlp()
13
+
14
+ # Load the pre-trained Translation
15
+ app.state.model_trans, app.state.tokenizer_trans = load_model_translation()
16
+
17
+ # Load the pre-trained TTS
18
+ app.state.model_tts = load_model_tts()
19
+
20
+ # Load the pre-trained ASR
21
+ app.state.processor_asr, app.state.model_asr = load_model_asr()
22
+
23
+ # Include the NLP router
24
+ app.include_router(nlp_router)
25
+ # Include the translation router
26
+ app.include_router(trans_router)
27
+ # Include the ASR router
28
+ app.include_router(asr_router)
29
+
30
+ @app.get("/")
31
+ def root():
32
+ return {"message": "Welcome to the English Learning API"}
app/routes/__init__.py ADDED
File without changes
app/routes/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (158 Bytes). View file
 
app/routes/__pycache__/asr.cpython-312.pyc ADDED
Binary file (2.19 kB). View file
 
app/routes/__pycache__/nlp.cpython-312.pyc ADDED
Binary file (5.58 kB). View file
 
app/routes/__pycache__/translation.cpython-312.pyc ADDED
Binary file (1.81 kB). View file
 
app/routes/__pycache__/tts.cpython-312.pyc ADDED
Binary file (1.54 kB). View file
 
app/routes/asr.py ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import WhisperForConditionalGeneration, WhisperProcessor
2
+ from fastapi import APIRouter, Request, UploadFile, File
3
+ import librosa
4
+ import os
5
+
6
+ router = APIRouter()
7
+
8
+ def load_model_asr():
9
+ processor = WhisperProcessor.from_pretrained("openai/whisper-small.en")
10
+ model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-small.en")
11
+ return processor, model
12
+
13
+ @router.post("/asr")
14
+ async def asr(request: Request, audio: UploadFile = File(...)):
15
+ # Get the loaded ASR model and processor
16
+ processor, model = request.app.state.processor_asr, request.app.state.model_asr
17
+ # Audio file path
18
+ audio_path = os.path.join(request.app.state.AUDIO_DIR, "temp", audio.filename)
19
+ with open(audio_path, "wb") as f:
20
+ f.write(await audio.read())
21
+
22
+ # Loading audio file
23
+ audio_data, sampling_rate = librosa.load(audio_path, sr=16000)
24
+
25
+ # Preparing input data
26
+ inputs = processor(audio_data, return_tensors="pt", sampling_rate=sampling_rate)
27
+ input_features = inputs["input_features"]
28
+
29
+ # Generating token IDs
30
+ output = model.generate(input_features)
31
+
32
+ # Decoding tokens into text
33
+ transcription = processor.batch_decode(output, skip_special_tokens=True)
34
+
35
+
36
+ return {"transcription": transcription[0]}
app/routes/nlp.py ADDED
@@ -0,0 +1,126 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import AutoModelForCausalLM, AutoTokenizer
2
+ from pydantic import BaseModel
3
+ from fastapi import APIRouter, Request
4
+ from .tts import save_audio
5
+
6
+
7
+ # Model name for NLP
8
+ model_name = "Qwen/Qwen2.5-1.5B-Instruct"
9
+ router = APIRouter()
10
+
11
+ class ChatRequest(BaseModel):
12
+ message: str
13
+
14
+ # Load NLP model and tokenizer
15
+ def load_model_nlp():
16
+ model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype="auto", device_map="auto")
17
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
18
+ return model, tokenizer
19
+
20
+ # Handle chat requests
21
+ @router.post("/chat")
22
+ async def chat(request: Request, message: ChatRequest):
23
+ message = message.message
24
+ # Get the loaded NLP model and tokenizer
25
+ model, tokenizer = request.app.state.model_nlp, request.app.state.tokenizer_nlp
26
+
27
+ # Prepare the conversation context
28
+ messages = [
29
+ {"role": "system", "content": """
30
+ You are Emma — a friendly, patient, encouraging native speaker of American English and an experienced English teacher. Assume every user is learning English.
31
+
32
+ Top priorities (in order):
33
+
34
+ First: Reply NATURALLY and CONVERSATIONALLY to the user’s most recent (last) message. The reply should sound like a warm, helpful human: concise (2–4 sentences), encouraging, and easy to understand.
35
+
36
+ Second: Immediately after that natural reply, analyze only that same most recent message for language errors and apply the correction rules below. Do not analyze earlier messages.
37
+
38
+ What to detect (error categories):
39
+
40
+ Grammar (tenses, word order, auxiliary duplication like “what’s is”, subject-verb agreement)
41
+
42
+ Vocabulary (word choice, false friends, awkward collocations)
43
+
44
+ Spelling
45
+
46
+ Punctuation
47
+
48
+ Register (formal vs. informal mismatch)
49
+
50
+ Typical learner errors (missing articles, capitalization mistakes, double auxiliaries, common typos)
51
+
52
+ Correction rules:
53
+
54
+ If any errors are found, append exactly one correction block at the end of your reply. If no errors are found, append nothing.
55
+
56
+ Corrections must be concise, clear, encouraging, and not overwhelming.
57
+
58
+ Explanations must be one sentence and simple.
59
+
60
+ Provide an example only if helpful, and keep it short (one sentence).
61
+
62
+ If multiple possible fixes exist, show the single most natural and simple correction for the learner (you may include a second only if it’s essential).
63
+
64
+ Exact correction block format (use this format verbatim):
65
+
66
+ CORRECTION:
67
+
68
+ Error: [short label — e.g. “Grammar” / “Spelling” / “Vocabulary”]
69
+
70
+ Original: “...original text fragment...”
71
+
72
+ Correction: “...suggested correction...”
73
+
74
+ Explanation: [one-sentence, simple explanation]
75
+ (If helpful) Example: “...full correct sentence...”
76
+
77
+ Behavior & style constraints:
78
+
79
+ Always prioritize the conversational reply above the correction. The correction is an add-on, never the primary content.
80
+
81
+ Tone: friendly, supportive, patient, non-judgmental.
82
+
83
+ Keep everything short, organized, and easy to scan.
84
+
85
+ Never invent facts. If you don’t know something, say “I don’t know” or ask a clarifying question.
86
+
87
+ Assume the user is an English learner and tailor explanations accordingly.
88
+
89
+ No long grammar essays; keep corrections short and actionable.
90
+
91
+ Execution notes for the model (internal-use guidance you should follow):
92
+
93
+ Analyze only the last user message text (no earlier context).
94
+
95
+ If the last message contains more than one error, include up to two prioritized corrections inside the single correction block (choose the two most important).
96
+
97
+ Use natural, learner-friendly wording in explanations.
98
+
99
+ Keep the correction block compact and visually distinct from the conversational reply.
100
+
101
+ Use your prompt-optimization and code-writing strengths to keep instructions minimal but robust — be decisive and pick the clearest fix.
102
+
103
+ Final instruction: Reply to the user’s most recent message now, following these rules exactly.
104
+ """},
105
+ {"role": "user", "content": message},
106
+ ]
107
+
108
+ # Tokenize input and generate a response
109
+ text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
110
+ model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
111
+ generated_ids = model.generate(
112
+ **model_inputs,
113
+ max_new_tokens=512,
114
+ top_p=0.9,
115
+ temperature=0.7,
116
+ do_sample=True,
117
+ pad_token_id=tokenizer.eos_token_id)
118
+
119
+ # Decode the response
120
+ generated_ids = [output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)]
121
+ response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
122
+
123
+ # Save response as audio
124
+ url_path = save_audio(request, response)
125
+
126
+ return {"response": response, "audio": url_path}
app/routes/translation.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
2
+ from fastapi import APIRouter, Request
3
+ from pydantic import BaseModel
4
+
5
+ model_name = "allegro/BiDi-eng-pol"
6
+ router = APIRouter()
7
+
8
+ class TextInput(BaseModel):
9
+ text: str
10
+
11
+ # Ładowanie modelu tłumaczenia
12
+ def load_model_translation():
13
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
14
+ model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
15
+ return model, tokenizer
16
+
17
+ @router.post("/translate")
18
+ async def translate_text(request: Request, text: TextInput):
19
+ model, tokenizer = request.app.state.model_trans, request.app.state.tokenizer_trans
20
+
21
+ # Prefiks >>pol<< informuje model, że ma tłumaczyć na polski
22
+ text = ">>pol<< " + text.text
23
+
24
+ # Tokenizacja i generowanie tłumaczenia
25
+ inputs = tokenizer([text], return_tensors="pt", padding=True)
26
+ translated = model.generate(**inputs)
27
+ decoded_translation = tokenizer.decode(translated[0], skip_special_tokens=True)
28
+
29
+ return {"translation": decoded_translation}
app/routes/tts.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import Request
2
+ from kokoro import KPipeline
3
+ import numpy as np
4
+ import soundfile as sf
5
+ import os
6
+ import uuid
7
+
8
+ # Ładowanie modelu Kokoro tylko raz przy starcie aplikacji
9
+ def load_model_tts():
10
+ pipeline = KPipeline(lang_code='a') # 'a' = automatyczne wykrycie języka
11
+ return pipeline
12
+
13
+ # Funkcja generująca i zapisująca audio
14
+ def save_audio(request: Request, text: str, voice: str = 'af_heart'):
15
+ pipeline = request.app.state.model_tts
16
+
17
+ file_name = f"{uuid.uuid4()}.wav"
18
+ file_path = os.path.join(request.app.state.AUDIO_DIR, file_name)
19
+ # Initialize an empty array to merge all audio fragments
20
+ audio_total = np.array([], dtype=np.float32)
21
+
22
+ # We generate audio in streaming mode (the generator returns fragments)
23
+ generator = pipeline(text, voice=voice)
24
+ for _, _, audio in generator:
25
+ audio_total = np.concatenate([audio_total, audio])
26
+
27
+ sf.write(file_path, audio_total, 24000)
28
+ return f"http://127.0.0.1:8000/audio/{file_name}"
requirements.txt ADDED
@@ -0,0 +1,197 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ absl-py==2.1.0
2
+ accelerate==1.5.2
3
+ addict==2.4.0
4
+ aiohappyeyeballs==2.6.1
5
+ aiohttp==3.11.13
6
+ aiosignal==1.3.2
7
+ annotated-types==0.7.0
8
+ anyio==4.9.0
9
+ asttokens==3.0.0
10
+ astunparse==1.6.3
11
+ attrs==25.3.0
12
+ audioread==3.0.1
13
+ babel==2.17.0
14
+ blis==1.2.0
15
+ catalogue==2.0.10
16
+ certifi==2025.1.31
17
+ cffi==1.17.1
18
+ charset-normalizer==3.4.1
19
+ click==8.1.8
20
+ cloudpathlib==0.21.0
21
+ colorama==0.4.6
22
+ confection==0.1.5
23
+ csvw==3.5.1
24
+ curated-tokenizers==0.0.9
25
+ curated-transformers==0.1.1
26
+ cymem==2.0.11
27
+ datasets==3.4.0
28
+ decorator==5.2.1
29
+ dill==0.3.8
30
+ Distance==0.1.3
31
+ dlinfo==2.0.0
32
+ dnspython==2.7.0
33
+ docopt==0.6.2
34
+ email_validator==2.2.0
35
+ en_core_web_sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl#sha256=1932429db727d4bff3deed6b34cfc05df17794f4a52eeb26cf8928f7c1a0fb85
36
+ espeakng-loader==0.2.4
37
+ executing==2.2.0
38
+ fastapi==0.115.11
39
+ fastapi-cli==0.0.7
40
+ filelock==3.18.0
41
+ flatbuffers==25.2.10
42
+ frozenlist==1.5.0
43
+ fsspec==2024.12.0
44
+ g2p-en==2.1.0
45
+ gast==0.6.0
46
+ google-pasta==0.2.0
47
+ grpcio==1.71.0
48
+ h11==0.14.0
49
+ h5py==3.13.0
50
+ httpcore==1.0.7
51
+ httptools==0.6.4
52
+ httpx==0.28.1
53
+ huggingface-hub==0.29.3
54
+ idna==3.10
55
+ inflect==7.5.0
56
+ ipython==9.0.2
57
+ ipython_pygments_lexers==1.1.1
58
+ isodate==0.7.2
59
+ itsdangerous==2.2.0
60
+ jedi==0.19.2
61
+ Jinja2==3.1.6
62
+ joblib==1.4.2
63
+ jsonschema==4.23.0
64
+ jsonschema-specifications==2024.10.1
65
+ keras==3.9.0
66
+ kokoro==0.9.4
67
+ langcodes==3.5.0
68
+ language-tags==1.2.0
69
+ language_data==1.3.0
70
+ lazy_loader==0.4
71
+ libclang==18.1.1
72
+ librosa==0.11.0
73
+ llvmlite==0.44.0
74
+ loguru==0.7.3
75
+ marisa-trie==1.2.1
76
+ Markdown==3.7
77
+ markdown-it-py==3.0.0
78
+ MarkupSafe==3.0.2
79
+ matplotlib-inline==0.1.7
80
+ mdurl==0.1.2
81
+ misaki==0.9.4
82
+ ml_dtypes==0.5.1
83
+ more-itertools==10.6.0
84
+ mpmath==1.3.0
85
+ msgpack==1.1.0
86
+ multidict==6.1.0
87
+ multiprocess==0.70.16
88
+ murmurhash==1.0.12
89
+ namex==0.0.8
90
+ networkx==3.4.2
91
+ nltk==3.9.1
92
+ num2words==0.5.14
93
+ numba==0.61.0
94
+ numpy==1.26.4
95
+ nvidia-cublas-cu12==12.4.5.8
96
+ nvidia-cuda-cupti-cu12==12.4.127
97
+ nvidia-cuda-nvrtc-cu12==12.4.127
98
+ nvidia-cuda-runtime-cu12==12.4.127
99
+ nvidia-cudnn-cu12==9.1.0.70
100
+ nvidia-cufft-cu12==11.2.1.3
101
+ nvidia-curand-cu12==10.3.5.147
102
+ nvidia-cusolver-cu12==11.6.1.9
103
+ nvidia-cusparse-cu12==12.3.1.170
104
+ nvidia-cusparselt-cu12==0.6.2
105
+ nvidia-nccl-cu12==2.21.5
106
+ nvidia-nvjitlink-cu12==12.4.127
107
+ nvidia-nvtx-cu12==12.4.127
108
+ opt_einsum==3.4.0
109
+ optree==0.14.1
110
+ orjson==3.10.15
111
+ packaging==24.2
112
+ pandas==2.2.3
113
+ parso==0.8.4
114
+ pexpect==4.9.0
115
+ phonemizer-fork==3.3.2
116
+ platformdirs==4.3.6
117
+ pooch==1.8.2
118
+ preshed==3.0.9
119
+ prompt_toolkit==3.0.50
120
+ propcache==0.3.0
121
+ protobuf==5.29.3
122
+ psutil==7.0.0
123
+ ptyprocess==0.7.0
124
+ pure_eval==0.2.3
125
+ pyarrow==19.0.1
126
+ pycparser==2.22
127
+ pydantic==2.10.6
128
+ pydantic-extra-types==2.10.3
129
+ pydantic-settings==2.8.1
130
+ pydantic_core==2.27.2
131
+ Pygments==2.19.1
132
+ pyparsing==3.2.1
133
+ python-dateutil==2.9.0.post0
134
+ python-dotenv==1.0.1
135
+ python-multipart==0.0.20
136
+ pytz==2025.1
137
+ PyYAML==6.0.2
138
+ rdflib==7.1.3
139
+ referencing==0.36.2
140
+ regex==2024.11.6
141
+ requests==2.32.3
142
+ rfc3986==1.5.0
143
+ rich==13.9.4
144
+ rich-toolkit==0.13.2
145
+ rpds-py==0.23.1
146
+ safetensors==0.5.3
147
+ scikit-learn==1.6.1
148
+ scipy==1.15.2
149
+ segments==2.3.0
150
+ sentencepiece==0.2.0
151
+ setuptools==76.0.0
152
+ shellingham==1.5.4
153
+ six==1.17.0
154
+ smart-open==7.1.0
155
+ sniffio==1.3.1
156
+ soundfile==0.13.1
157
+ soxr==0.5.0.post1
158
+ spacy==3.8.4
159
+ spacy-curated-transformers==0.3.0
160
+ spacy-legacy==3.0.12
161
+ spacy-loggers==1.0.5
162
+ srsly==2.5.1
163
+ stack-data==0.6.3
164
+ starlette==0.46.1
165
+ sympy==1.13.1
166
+ tensorboard==2.19.0
167
+ tensorboard-data-server==0.7.2
168
+ tensorflow==2.19.0
169
+ termcolor==2.5.0
170
+ thinc==8.3.4
171
+ threadpoolctl==3.6.0
172
+ tokenizers==0.21.1
173
+ torch==2.6.0
174
+ torchaudio==2.6.0
175
+ tqdm==4.67.1
176
+ traitlets==5.14.3
177
+ transformers==4.49.0
178
+ triton==3.2.0
179
+ typeguard==4.4.2
180
+ typer==0.15.2
181
+ typing_extensions==4.12.2
182
+ tzdata==2025.1
183
+ ujson==5.10.0
184
+ uritemplate==4.1.1
185
+ urllib3==2.3.0
186
+ uvicorn==0.34.0
187
+ uvloop==0.21.0
188
+ wasabi==1.1.3
189
+ watchfiles==1.0.4
190
+ wcwidth==0.2.13
191
+ weasel==0.4.1
192
+ websockets==15.0.1
193
+ Werkzeug==3.1.3
194
+ wheel==0.45.1
195
+ wrapt==1.17.2
196
+ xxhash==3.5.0
197
+ yarl==1.18.3