Spaces:

Sabbah13
/

text_transcribation_diarization_and_summarization

Paused

Sabbah13 commited on Jul 5, 2024

Commit

e8a4c9c

verified ·

1 Parent(s): 124547b

added getting number of tokens for proccessing and removed whisper settings

Files changed (1) hide show

app.py CHANGED Viewed

@@ -3,15 +3,15 @@ import streamlit as st
 import whisperx
 import torch
 from utils import convert_segments_object_to_text, check_password
-from gigiachat_requests import get_access_token, get_completion_from_gigachat
 if check_password():
     st.title('Audio Transcription App')
     st.sidebar.title("Settings")
-    # Sidebar inputs
-    device = st.sidebar.selectbox("Device", ["cpu", "cuda"], index=1)
-    batch_size = st.sidebar.number_input("Batch Size", min_value=1, value=16)
-    compute_type = st.sidebar.selectbox("Compute Type", ["float16", "int8"], index=0)
     initial_giga_base_prompt = os.getenv('GIGA_BASE_PROMPT')
     initial_giga_processing_prompt = os.getenv('GIGA_PROCCESS_PROMPT')
@@ -58,8 +58,9 @@ if check_password():
         if (enable_summarization):
             with st.spinner('Обрабатываем транскрибацию...'):
-                transcript = get_completion_from_gigachat(giga_processing_prompt + transcript, 32768, access_token)
                 st.write("Результат обработки:")
                 st.text(transcript)

 import whisperx
 import torch
 from utils import convert_segments_object_to_text, check_password
+from gigiachat_requests import get_access_token, get_completion_from_gigachat, get_number_of_tokens
 if check_password():
     st.title('Audio Transcription App')
     st.sidebar.title("Settings")
+    device = os.getenv('DEVICE')
+    batch_size = int(os.getenv('BATCH_SIZE'))
+    compute_type = os.getenv('COMPUTE_TYPE')
     initial_giga_base_prompt = os.getenv('GIGA_BASE_PROMPT')
     initial_giga_processing_prompt = os.getenv('GIGA_PROCCESS_PROMPT')
         if (enable_summarization):
             with st.spinner('Обрабатываем транскрибацию...'):
+                number_of_tokens = get_number_of_tokens(transcript, access_token)
+                transcript = get_completion_from_gigachat(giga_processing_prompt + transcript, number_of_tokens, access_token)
                 st.write("Результат обработки:")
                 st.text(transcript)