syafiqq02 commited on
Commit
8cfd27b
·
1 Parent(s): 2730879
Files changed (4) hide show
  1. Dockerfile +0 -1
  2. app/__pycache__/main.cpython-39.pyc +0 -0
  3. app/main.py +5 -22
  4. app/nltk.py +0 -1
Dockerfile CHANGED
@@ -6,7 +6,6 @@ COPY app /code/app
6
 
7
  RUN pip install --upgrade pip
8
  RUN pip install -r /code/app/requirements.txt
9
- RUN python /code/app/nltk.py
10
 
11
  EXPOSE 7860
12
 
 
6
 
7
  RUN pip install --upgrade pip
8
  RUN pip install -r /code/app/requirements.txt
 
9
 
10
  EXPOSE 7860
11
 
app/__pycache__/main.cpython-39.pyc ADDED
Binary file (3.78 kB). View file
 
app/main.py CHANGED
@@ -3,11 +3,9 @@ import nltk
3
  import uvicorn
4
  from fastapi import FastAPI, File, UploadFile
5
  from pydantic import BaseModel
6
- from sumy.parsers.plaintext import PlaintextParser
7
- from sumy.nlp.tokenizers import Tokenizer
8
- from sumy.summarizers.lsa import LsaSummarizer
9
  from groq import Groq
10
 
 
11
  GROQ_API_KEY = "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"
12
  groq_client = Groq(api_key=GROQ_API_KEY)
13
 
@@ -16,7 +14,7 @@ def save_to_file(content: str, filename: str) -> str:
16
  file.write(content)
17
  return filename
18
 
19
- def transcribe_and_summarize(audio_path: str):
20
  with open(audio_path, "rb") as audio_file:
21
  response = groq_client.audio.transcriptions.create(
22
  model="whisper-large-v3",
@@ -24,19 +22,7 @@ def transcribe_and_summarize(audio_path: str):
24
  response_format="text"
25
  )
26
  transcription = response
27
-
28
- parser = PlaintextParser.from_string(transcription, Tokenizer("english"))
29
- summarizer = LsaSummarizer()
30
- summary_sentences = summarizer(parser.document, 5)
31
- summarized_text = " ".join(str(s) for s in summary_sentences)
32
-
33
- original_tokens = len(nltk.word_tokenize(transcription))
34
- summarized_tokens = len(nltk.word_tokenize(summarized_text))
35
- token_info = f"Asli: {original_tokens} token | Ringkasan: {summarized_tokens} token"
36
-
37
- summarized_file = save_to_file(summarized_text, "summarized_transcription.txt")
38
-
39
- return transcription, summarized_text, summarized_file, audio_path, token_info
40
 
41
  def summarize_soap(dialogue: str):
42
  prompt_soap = f"""
@@ -97,7 +83,7 @@ async def full_process(audio: UploadFile = File(...)):
97
  with open(temp_audio_path, "wb") as f:
98
  f.write(await audio.read())
99
 
100
- transcription, summarized_text, summarized_file, audio_path, token_info = transcribe_and_summarize(temp_audio_path)
101
 
102
  soap_content, soap_file = generate_soap(transcription)
103
  tags_content, tags_file = generate_tags(transcription)
@@ -106,10 +92,7 @@ async def full_process(audio: UploadFile = File(...)):
106
 
107
  return {
108
  "transcription": transcription,
109
- "summarized_text": summarized_text,
110
- "summarized_file": summarized_file,
111
- "audio_path": audio_path,
112
- "token_info": token_info,
113
  "soap_content": soap_content,
114
  "soap_file": soap_file,
115
  "tags_content": tags_content,
 
3
  import uvicorn
4
  from fastapi import FastAPI, File, UploadFile
5
  from pydantic import BaseModel
 
 
 
6
  from groq import Groq
7
 
8
+
9
  GROQ_API_KEY = "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"
10
  groq_client = Groq(api_key=GROQ_API_KEY)
11
 
 
14
  file.write(content)
15
  return filename
16
 
17
+ def transcribe_audio(audio_path: str):
18
  with open(audio_path, "rb") as audio_file:
19
  response = groq_client.audio.transcriptions.create(
20
  model="whisper-large-v3",
 
22
  response_format="text"
23
  )
24
  transcription = response
25
+ return transcription
 
 
 
 
 
 
 
 
 
 
 
 
26
 
27
  def summarize_soap(dialogue: str):
28
  prompt_soap = f"""
 
83
  with open(temp_audio_path, "wb") as f:
84
  f.write(await audio.read())
85
 
86
+ transcription = transcribe_audio(temp_audio_path)
87
 
88
  soap_content, soap_file = generate_soap(transcription)
89
  tags_content, tags_file = generate_tags(transcription)
 
92
 
93
  return {
94
  "transcription": transcription,
95
+ "audio_path": temp_audio_path,
 
 
 
96
  "soap_content": soap_content,
97
  "soap_file": soap_file,
98
  "tags_content": tags_content,
app/nltk.py DELETED
@@ -1 +0,0 @@
1
- import nltk