Spaces:
Sleeping
Sleeping
VenkateshRoshan commited on
Commit ·
191f3b0
1
Parent(s): 5eec0c8
Dockerfile Updated
Browse files- app.py +55 -22
- automatic_deployer.py +7 -0
- dockerfile +11 -3
- requirements.txt +2 -1
app.py
CHANGED
|
@@ -6,17 +6,27 @@ from transformers import pipeline
|
|
| 6 |
from huggingface_hub import InferenceClient
|
| 7 |
import time
|
| 8 |
import psutil
|
|
|
|
| 9 |
# import torch
|
| 10 |
# import numpy as np
|
| 11 |
|
| 12 |
# Ensure CUDA is available and set device accordingly
|
| 13 |
# device = 0 if torch.cuda.is_available() else -1
|
| 14 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
model_id = "openai/whisper-small"
|
| 16 |
client = InferenceClient(model_id,token=os.getenv('HF_TOKEN'))
|
| 17 |
pipe = pipeline("automatic-speech-recognition", model=model_id) #, device=device)
|
| 18 |
|
| 19 |
-
print(f'The Server is Running !!!')
|
| 20 |
|
| 21 |
def transcribe(inputs, use_api):
|
| 22 |
start = time.time()
|
|
@@ -28,32 +38,55 @@ def transcribe(inputs, use_api):
|
|
| 28 |
raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.")
|
| 29 |
|
| 30 |
try:
|
| 31 |
-
if use_api:
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
else:
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
|
| 42 |
-
end = time.time() - start
|
| 43 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
# Measure memory after running the transcription process
|
| 45 |
memory_after = psutil.Process(os.getpid()).memory_info().rss
|
|
|
|
|
|
|
| 46 |
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
memory_used_gb = round(memory_used / (1024 ** 3), 2) # Convert memory used to GB
|
| 50 |
-
total_memory_gb = round(psutil.virtual_memory().total / (1024 ** 3), 2) # Total RAM in GB
|
| 51 |
-
|
| 52 |
-
# Calculate the percentage of RAM used by this process
|
| 53 |
-
memory_used_percent = round((memory_used / psutil.virtual_memory().total) * 100, 2)
|
| 54 |
|
| 55 |
-
|
| 56 |
-
|
| 57 |
|
| 58 |
except Exception as e:
|
| 59 |
return fr'Error: {str(e)}', None, None
|
|
@@ -107,4 +140,4 @@ with demo:
|
|
| 107 |
# # time_taken = gr.Textbox(label="Time taken", type="text") # Time taken outside the interfaces
|
| 108 |
|
| 109 |
if __name__ == "__main__":
|
| 110 |
-
demo.queue().launch()
|
|
|
|
| 6 |
from huggingface_hub import InferenceClient
|
| 7 |
import time
|
| 8 |
import psutil
|
| 9 |
+
from prometheus_client import start_http_server, Summary, Counter, Gauge
|
| 10 |
# import torch
|
| 11 |
# import numpy as np
|
| 12 |
|
| 13 |
# Ensure CUDA is available and set device accordingly
|
| 14 |
# device = 0 if torch.cuda.is_available() else -1
|
| 15 |
|
| 16 |
+
# Initialize Prometheus metrics
|
| 17 |
+
REQUEST_COUNT = Counter("transcription_requests_total", "Total transcription requests", ["method"])
|
| 18 |
+
REQUEST_DURATION = Summary("transcription_request_duration_seconds", "Duration of transcription requests in seconds", ["method"])
|
| 19 |
+
MEMORY_USAGE = Gauge("transcription_memory_usage_bytes", "Memory used by the transcription function")
|
| 20 |
+
RAM_USAGE_PERCENTAGE = Gauge("ram_usage_percentage", "Percentage of total RAM used by the transcription function")
|
| 21 |
+
|
| 22 |
+
# Start the Prometheus HTTP server to expose metrics
|
| 23 |
+
start_http_server(8000) # Port 8000 is the standard for Prometheus metrics
|
| 24 |
+
|
| 25 |
model_id = "openai/whisper-small"
|
| 26 |
client = InferenceClient(model_id,token=os.getenv('HF_TOKEN'))
|
| 27 |
pipe = pipeline("automatic-speech-recognition", model=model_id) #, device=device)
|
| 28 |
|
| 29 |
+
print(f'The Server is Running with prometheus Metrics enabled !!!')
|
| 30 |
|
| 31 |
def transcribe(inputs, use_api):
|
| 32 |
start = time.time()
|
|
|
|
| 38 |
raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.")
|
| 39 |
|
| 40 |
try:
|
| 41 |
+
# if use_api:
|
| 42 |
+
# print(f'Using API for transcription...')
|
| 43 |
+
# API_STATUS = 'Using API it took: '
|
| 44 |
+
# # Use InferenceClient (API) if checkbox is checked
|
| 45 |
+
# res = client.automatic_speech_recognition(inputs).text
|
| 46 |
+
# else:
|
| 47 |
+
# print(f'Using local pipeline for transcription...')
|
| 48 |
+
# # Use local pipeline if checkbox is unchecked
|
| 49 |
+
# API_STATUS = 'Using local pipeline it took: '
|
| 50 |
+
# res = pipe(inputs, chunk_length_s=30)["text"]
|
| 51 |
|
| 52 |
+
# end = time.time() - start
|
| 53 |
|
| 54 |
+
# # Measure memory after running the transcription process
|
| 55 |
+
# memory_after = psutil.Process(os.getpid()).memory_info().rss
|
| 56 |
+
|
| 57 |
+
# # Calculate the difference to see how much memory was used by the code
|
| 58 |
+
# memory_used = memory_after - memory_before # Memory used in bytes
|
| 59 |
+
# memory_used_gb = round(memory_used / (1024 ** 3), 2) # Convert memory used to GB
|
| 60 |
+
# total_memory_gb = round(psutil.virtual_memory().total / (1024 ** 3), 2) # Total RAM in GB
|
| 61 |
+
|
| 62 |
+
# # Calculate the percentage of RAM used by this process
|
| 63 |
+
# memory_used_percent = round((memory_used / psutil.virtual_memory().total) * 100, 2)
|
| 64 |
+
|
| 65 |
+
# return res, API_STATUS + str(round(end, 2)) + ' seconds', f"RAM Used by code: {memory_used_gb} GB ({memory_used_percent}%) Total RAM: {total_memory_gb} GB"
|
| 66 |
+
method = 'API' if use_api else 'Local Pipeline'
|
| 67 |
+
|
| 68 |
+
# Start timing for Prometheus
|
| 69 |
+
with REQUEST_DURATION.labels(method=method).time():
|
| 70 |
+
REQUEST_COUNT.labels(method=method).inc() # Increment the request counter
|
| 71 |
+
|
| 72 |
+
# Transcription
|
| 73 |
+
if use_api:
|
| 74 |
+
print(f'Using API for transcription...')
|
| 75 |
+
res = client.automatic_speech_recognition(inputs).text
|
| 76 |
+
else:
|
| 77 |
+
print(f'Using local pipeline for transcription...')
|
| 78 |
+
res = pipe(inputs, chunk_length_s=30)["text"]
|
| 79 |
+
|
| 80 |
# Measure memory after running the transcription process
|
| 81 |
memory_after = psutil.Process(os.getpid()).memory_info().rss
|
| 82 |
+
memory_used = memory_after - memory_before
|
| 83 |
+
MEMORY_USAGE.set(memory_used) # Set memory usage in bytes
|
| 84 |
|
| 85 |
+
total_memory_percent = psutil.virtual_memory().percent
|
| 86 |
+
RAM_USAGE_PERCENTAGE.set(total_memory_percent) # Set RAM usage as a percentage
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 87 |
|
| 88 |
+
end = time.time() - start
|
| 89 |
+
return res, f"{method} took: {round(end, 2)} seconds", f"RAM Used by code: {memory_used / (1024 ** 3):.2f} GB ({total_memory_percent}%)"
|
| 90 |
|
| 91 |
except Exception as e:
|
| 92 |
return fr'Error: {str(e)}', None, None
|
|
|
|
| 140 |
# # time_taken = gr.Textbox(label="Time taken", type="text") # Time taken outside the interfaces
|
| 141 |
|
| 142 |
if __name__ == "__main__":
|
| 143 |
+
demo.queue().launch(server_name="0.0.0.0", server_port=7860)
|
automatic_deployer.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
| 1 |
import subprocess
|
| 2 |
import os
|
|
|
|
| 3 |
import time
|
| 4 |
|
| 5 |
PORT=22013
|
|
@@ -19,6 +20,12 @@ def deploy():
|
|
| 19 |
except subprocess.CalledProcessError as e:
|
| 20 |
print(f"Error occurred while deploying: {e.stderr.decode()}")
|
| 21 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
def checkStatus(HOST, PORT):
|
| 23 |
print(f'Checking the status of the app...')
|
| 24 |
try:
|
|
|
|
| 1 |
import subprocess
|
| 2 |
import os
|
| 3 |
+
import socket
|
| 4 |
import time
|
| 5 |
|
| 6 |
PORT=22013
|
|
|
|
| 20 |
except subprocess.CalledProcessError as e:
|
| 21 |
print(f"Error occurred while deploying: {e.stderr.decode()}")
|
| 22 |
|
| 23 |
+
def is_port_busy(host, port):
|
| 24 |
+
"""Check if the specified port on the host is busy."""
|
| 25 |
+
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock:
|
| 26 |
+
sock.settimeout(1) # 1 second timeout
|
| 27 |
+
return sock.connect_ex((host, port)) == 0 # Returns True if port is busy
|
| 28 |
+
|
| 29 |
def checkStatus(HOST, PORT):
|
| 30 |
print(f'Checking the status of the app...')
|
| 31 |
try:
|
dockerfile
CHANGED
|
@@ -9,6 +9,7 @@ WORKDIR /app
|
|
| 9 |
# Install FFmpeg and other dependencies
|
| 10 |
RUN apt-get update
|
| 11 |
RUN apt-get install -y ffmpeg
|
|
|
|
| 12 |
RUN apt-get clean
|
| 13 |
|
| 14 |
# Copy the current directory contents into the container at /app
|
|
@@ -17,8 +18,15 @@ COPY . /app
|
|
| 17 |
# Install any needed packages specified in requirements.txt
|
| 18 |
RUN pip install --no-cache-dir -r requirements.txt
|
| 19 |
|
| 20 |
-
# Make port
|
| 21 |
-
EXPOSE
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
|
| 23 |
# Run app.py when the container launches
|
| 24 |
-
CMD ["python", "app.py"]
|
|
|
|
|
|
|
|
|
| 9 |
# Install FFmpeg and other dependencies
|
| 10 |
RUN apt-get update
|
| 11 |
RUN apt-get install -y ffmpeg
|
| 12 |
+
RUN apt-get install -y prometheus-node-exporter
|
| 13 |
RUN apt-get clean
|
| 14 |
|
| 15 |
# Copy the current directory contents into the container at /app
|
|
|
|
| 18 |
# Install any needed packages specified in requirements.txt
|
| 19 |
RUN pip install --no-cache-dir -r requirements.txt
|
| 20 |
|
| 21 |
+
# Make port 7860 available to the world outside this container
|
| 22 |
+
EXPOSE 7860
|
| 23 |
+
# Prometheus Node Exporter metrics
|
| 24 |
+
EXPOSE 25561
|
| 25 |
+
# Prometheus Python app metrics
|
| 26 |
+
EXPOSE 25562
|
| 27 |
+
|
| 28 |
|
| 29 |
# Run app.py when the container launches
|
| 30 |
+
# CMD ["python", "app.py"]
|
| 31 |
+
# Run both the Node Exporter and the Gradio application
|
| 32 |
+
CMD ["sh", "-c", "prometheus-node-exporter & python app.py"]
|
requirements.txt
CHANGED
|
@@ -5,4 +5,5 @@ huggingface_hub
|
|
| 5 |
pytest
|
| 6 |
gradio
|
| 7 |
ffmpeg
|
| 8 |
-
psutil
|
|
|
|
|
|
| 5 |
pytest
|
| 6 |
gradio
|
| 7 |
ffmpeg
|
| 8 |
+
psutil
|
| 9 |
+
prometheus_client
|