dalager commited on
Commit
57eaa54
·
1 Parent(s): fff1932

Endpoint testing with js or python

Browse files

Samme test script i hhv
- python med deps i requirements.txt
- js, uden eksterne dependencies, så kan køres med node cli direkte

hf_endpoint_testing/hf_endpoint_client.js ADDED
@@ -0,0 +1,151 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ const fs = require("fs");
2
+ const path = require("path");
3
+ const https = require("https");
4
+
5
+ const API_URL = process.env.HF_ENDPOINT;
6
+ const HF_TOKEN = process.env.HF_TOKEN;
7
+
8
+ if (!HF_TOKEN) {
9
+ console.error("Error: HF_TOKEN environment variable is not set.");
10
+ process.exit(1);
11
+ }
12
+
13
+ if (!API_URL) {
14
+ console.error("Error: HF_ENDPOINT environment variable is not set.");
15
+ process.exit(1);
16
+ }
17
+
18
+ const headers = {
19
+ Accept: "application/json",
20
+ Authorization: `Bearer ${HF_TOKEN}`,
21
+ "Content-Type": "application/json",
22
+ };
23
+
24
+ function transcribe(filename, params = null) {
25
+ // Check if file exists
26
+ if (!fs.existsSync(filename)) {
27
+ console.error(`Error: File '${filename}' does not exist.`);
28
+ process.exit(1);
29
+ }
30
+
31
+ const parameters = params || {
32
+ word_timestamps: false,
33
+ temperature: [0.0, 0.1, 0.3, 0.5, 0.7, 0.9, 1],
34
+ repetition_penalty: 1.2,
35
+ log_prob_threshold: -1.0,
36
+ };
37
+
38
+ callEndpoint(filename, parameters)
39
+ .then((response) => {
40
+ const txt = response.text;
41
+ if (!txt) {
42
+ console.error("Error: No text returned");
43
+ console.error(response);
44
+ process.exit(1);
45
+ }
46
+
47
+ console.log(txt);
48
+ const isodate = new Date().toISOString().replace(/:/g, "_").split(".")[0];
49
+
50
+ const resultfile = `${path.basename(filename)}_${isodate}.txt`;
51
+
52
+ fs.writeFileSync(
53
+ resultfile,
54
+ `----- PARAMETERS -----\n${JSON.stringify(
55
+ parameters,
56
+ null,
57
+ 2
58
+ )}\n----- RESULT -----\n${JSON.stringify(response, null, 2)}\n`,
59
+ "utf-8"
60
+ );
61
+ })
62
+ .catch((error) => {
63
+ console.error(`Error during transcription: ${error.message}`);
64
+ process.exit(1);
65
+ });
66
+ }
67
+
68
+ function callEndpoint(filename, params) {
69
+ // Check if file exists
70
+ if (!fs.existsSync(filename)) {
71
+ console.error(`Error: File '${filename}' does not exist.`);
72
+ process.exit(1);
73
+ }
74
+
75
+ const data = fs.readFileSync(filename);
76
+ const base64Data = data.toString("base64");
77
+
78
+ const payload = {
79
+ inputs: `data:audio/wav;base64,${base64Data}`,
80
+ language: "da",
81
+ parameters: params,
82
+ };
83
+
84
+ const jsonData = JSON.stringify(payload);
85
+
86
+ return new Promise((resolve, reject) => {
87
+ // Parse the API_URL correctly
88
+ let url;
89
+ try {
90
+ url = new URL(API_URL);
91
+ } catch (e) {
92
+ console.error(
93
+ `Error: Invalid API_URL '${API_URL}'. It should be a complete URL.`
94
+ );
95
+ process.exit(1);
96
+ }
97
+
98
+ const options = {
99
+ hostname: url.hostname,
100
+ port: 443,
101
+ path: url.pathname === "/" ? "/" : url.pathname,
102
+ method: "POST",
103
+ headers: {
104
+ ...headers,
105
+ "Content-Length": jsonData.length,
106
+ },
107
+ };
108
+
109
+ const req = https.request(options, (res) => {
110
+ let data = "";
111
+
112
+ res.on("data", (chunk) => {
113
+ data += chunk;
114
+ });
115
+
116
+ res.on("end", () => {
117
+ resolve(JSON.parse(data));
118
+ });
119
+ });
120
+
121
+ req.on("error", (error) => {
122
+ console.error(`Error: ${error.message}`);
123
+ reject(error);
124
+ });
125
+
126
+ req.write(jsonData);
127
+ req.end();
128
+ });
129
+ }
130
+
131
+ // CLI
132
+ const scriptName = path.basename(process.argv[1]);
133
+ if (process.argv.length < 3) {
134
+ console.error(`Usage: node ${scriptName} <command> <filename>`);
135
+ process.exit(1);
136
+ }
137
+
138
+ const command = process.argv[2];
139
+ const filename = process.argv[3];
140
+
141
+ if (command === "transcribe") {
142
+ if (!filename) {
143
+ console.error(`Usage: node ${scriptName} transcribe <filename>`);
144
+ process.exit(1);
145
+ }
146
+
147
+ transcribe(filename);
148
+ } else {
149
+ console.error(`Unknown command. Supported commands: transcribe`);
150
+ process.exit(1);
151
+ }
hf_endpoint_testing/hf_endpoint_client.py ADDED
@@ -0,0 +1,138 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import base64
2
+ import datetime
3
+ import json
4
+ import sys
5
+ import requests
6
+
7
+ # get waw files in the current directory
8
+ import os
9
+ import glob
10
+ import re
11
+ import subprocess
12
+
13
+ # load dotenv
14
+ from dotenv import load_dotenv
15
+
16
+ load_dotenv()
17
+
18
+
19
+ API_URL = os.getenv(
20
+ "HF_ENDPOINT", "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX.endpoints.huggingface.cloud"
21
+ )
22
+ headers = {
23
+ "Accept": "application/json",
24
+ "Authorization": f"Bearer {os.getenv('HF_TOKEN')}",
25
+ "Content-Type": "application/json",
26
+ }
27
+
28
+
29
+ def transcribe(filename, params=None):
30
+ parameters = params or {
31
+ "word_timestamps": False,
32
+ # "initial_prompt": "",
33
+ "temperature": [0.0, 0.1, 0.3, 0.5, 0.7, 0.9, 1],
34
+ "repetition_penalty": 1.2,
35
+ "log_prob_threshold": -1.0,
36
+ }
37
+ result = call_endpoint(filename, parameters)
38
+ txt = result.get("text")
39
+ if txt is None:
40
+ print("Error: No text returned")
41
+ print(result)
42
+ sys.exit(1)
43
+
44
+ print(txt)
45
+ isodate = datetime.datetime.now().isoformat()
46
+ isodate = isodate.split(".")[0]
47
+ isodate = isodate.replace(":", "_")
48
+
49
+ # strip extension from filename
50
+ resultfile = os.path.basename(filename)
51
+ resultfile = f"{resultfile}_{isodate}.txt"
52
+
53
+ with open(resultfile, "w", encoding="utf-8") as f:
54
+ f.write("----- PARAMETERS -----\n")
55
+ f.write(json.dumps(parameters, indent=2, ensure_ascii=False))
56
+ f.write("\n----- RESULT -----\n")
57
+ f.write(json.dumps(result, indent=2, ensure_ascii=False))
58
+ f.write("\n")
59
+
60
+
61
+ def call_endpoint(filename, params=None):
62
+ with open(filename, "rb") as f:
63
+ data = f.read()
64
+
65
+ # Convert to base64 string
66
+ data = base64.b64encode(data).decode("utf-8")
67
+ # print(data)
68
+ if params is None:
69
+ parameters = {
70
+ "initial_prompt": "Her går det godt når vi taler om det",
71
+ "word_timestamps": False,
72
+ "vad_parameters": dict(min_silence_duration_ms=500),
73
+ }
74
+ else:
75
+ parameters = params
76
+
77
+ print(f"Parameters: {parameters}")
78
+ payload = {
79
+ "inputs": f"data:audio/wav,base64,{data}",
80
+ "language": "da",
81
+ "parameters": parameters,
82
+ }
83
+ json_data = json.dumps(payload)
84
+ # print(json_data)
85
+ now = datetime.datetime.now()
86
+ print(f"Sending request to {API_URL}...")
87
+ response = requests.post(API_URL, headers=headers, data=json_data)
88
+ elapsed_time = datetime.datetime.now() - now
89
+ print(f"Elapsed time: {elapsed_time}")
90
+
91
+ return response.json()
92
+
93
+
94
+ def convert(filename):
95
+ # Convert to 16kHz mono WAV using ffmpeg
96
+ output_file = filename.replace(".wav", "_16k.wav")
97
+ print(f"Converting {filename} to 16khz mono --> {output_file}...")
98
+
99
+ if not os.path.exists(output_file):
100
+ subprocess.run(
101
+ [
102
+ "ffmpeg",
103
+ "-i",
104
+ filename,
105
+ "-ar",
106
+ "16000",
107
+ "-ac",
108
+ "1",
109
+ "-c:a",
110
+ "pcm_s16le",
111
+ output_file,
112
+ ],
113
+ check=True,
114
+ )
115
+ return output_file
116
+
117
+
118
+ # cli
119
+
120
+ if sys.argv[1] == "convert":
121
+ if len(sys.argv) < 2:
122
+ print("Usage: python test_endpoint.py convert <filename>")
123
+ exit(1)
124
+
125
+ filename = sys.argv[2]
126
+ convert(filename)
127
+ exit(0)
128
+
129
+
130
+ if (len(sys.argv) > 1 and sys.argv[1] == "transcribe") or len(sys.argv) < 2:
131
+ if len(sys.argv) < 2:
132
+ print("Usage: python test_endpoint.py transcribe <filename>")
133
+ exit(1)
134
+
135
+ filename = sys.argv[2]
136
+ output = transcribe(filename)
137
+ print(output)
138
+ exit(0)
hf_endpoint_testing/requirements.txt ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ certifi==2025.1.31
2
+ cffi==1.17.1
3
+ charset-normalizer==3.4.1
4
+ dotenv==0.9.9
5
+ idna==3.10
6
+ numpy==2.2.4
7
+ pycparser==2.22
8
+ python-dotenv==1.1.0
9
+ requests==2.32.3
10
+ soundfile==0.13.1
11
+ urllib3==2.3.0