Spaces:
Sleeping
Sleeping
Update Demo.py
Browse files
Demo.py
CHANGED
|
@@ -65,21 +65,13 @@ def create_pipeline(model):
|
|
| 65 |
def fit_data(pipeline, fed_data):
|
| 66 |
"""Fit the data into the pipeline and return the transcription."""
|
| 67 |
data, sampling_rate = librosa.load(fed_data, sr=16000)
|
| 68 |
-
data =
|
| 69 |
-
|
| 70 |
-
schema = StructType([
|
| 71 |
-
StructField("audio_content", ArrayType(FloatType())),
|
| 72 |
-
StructField("sampling_rate", LongType())
|
| 73 |
-
])
|
| 74 |
-
|
| 75 |
-
df = pd.DataFrame({
|
| 76 |
-
"audio_content": [data],
|
| 77 |
-
"sampling_rate": [sampling_rate]
|
| 78 |
-
})
|
| 79 |
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
|
|
|
| 83 |
|
| 84 |
def save_uploadedfile(uploadedfile, path):
|
| 85 |
"""Save the uploaded file to the specified path."""
|
|
@@ -119,7 +111,7 @@ st.sidebar.markdown("""
|
|
| 119 |
""", unsafe_allow_html=True)
|
| 120 |
|
| 121 |
# Load examples
|
| 122 |
-
AUDIO_FILE_PATH = "inputs"
|
| 123 |
audio_files = sorted(os.listdir(AUDIO_FILE_PATH))
|
| 124 |
|
| 125 |
selected_audio = st.selectbox("Select an audio", audio_files)
|
|
@@ -146,4 +138,4 @@ pipeline = create_pipeline(model)
|
|
| 146 |
output = fit_data(pipeline, selected_audio)
|
| 147 |
|
| 148 |
st.subheader(f"Transcription:")
|
| 149 |
-
st.markdown(f"
|
|
|
|
| 65 |
def fit_data(pipeline, fed_data):
|
| 66 |
"""Fit the data into the pipeline and return the transcription."""
|
| 67 |
data, sampling_rate = librosa.load(fed_data, sr=16000)
|
| 68 |
+
data = data.tolist()
|
| 69 |
+
spark_df = spark.createDataFrame([[data]], ["audio_content"])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 70 |
|
| 71 |
+
model = pipeline.fit(spark_df)
|
| 72 |
+
lp = LightPipeline(model)
|
| 73 |
+
lp_result = lp.fullAnnotate(data)[0]
|
| 74 |
+
return lp_result
|
| 75 |
|
| 76 |
def save_uploadedfile(uploadedfile, path):
|
| 77 |
"""Save the uploaded file to the specified path."""
|
|
|
|
| 111 |
""", unsafe_allow_html=True)
|
| 112 |
|
| 113 |
# Load examples
|
| 114 |
+
AUDIO_FILE_PATH = "/content/Wav2Vec2ForCTC/inputs"
|
| 115 |
audio_files = sorted(os.listdir(AUDIO_FILE_PATH))
|
| 116 |
|
| 117 |
selected_audio = st.selectbox("Select an audio", audio_files)
|
|
|
|
| 138 |
output = fit_data(pipeline, selected_audio)
|
| 139 |
|
| 140 |
st.subheader(f"Transcription:")
|
| 141 |
+
st.markdown(f"{(output['text'][0].result).title()}")
|