syafiqq02 commited on
Commit
10e3cf1
·
1 Parent(s): 8d0e8af
Files changed (1) hide show
  1. app.py +89 -27
app.py CHANGED
@@ -6,68 +6,130 @@ from sumy.parsers.plaintext import PlaintextParser
6
  from sumy.nlp.tokenizers import Tokenizer
7
  from sumy.summarizers.lsa import LsaSummarizer
8
 
9
- # Mengunduh tokenizer NLTK jika belum tersedia
10
  nltk.download('punkt')
11
  nltk.download('punkt_tab')
12
 
13
- # API Key Groq (Gantilah dengan API Key Anda)
14
  GROQ_API_KEY = "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"
15
-
16
- # Inisialisasi klien API Groq
17
  groq_client = Groq(api_key=GROQ_API_KEY)
18
 
19
  def save_to_file(content, filename):
20
- """Menyimpan teks ke file."""
21
  with open(filename, 'w', encoding='utf-8') as file:
22
  file.write(content)
23
  return filename
24
 
25
- def transcribe_and_summarize(audio_path):
26
- """Transkripsi audio menggunakan Groq API dan model Whisper Large V3, lalu melakukan summarization dengan LSA."""
27
  with open(audio_path, "rb") as audio_file:
28
  response = groq_client.audio.transcriptions.create(
29
  model="whisper-large-v3",
30
  file=audio_file,
31
  response_format="text"
32
  )
33
- transcription = response # Hasil transkripsi teks
34
 
35
- # Meringkas hasil transkripsi dengan Sumy LSA (5 kalimat)
36
  parser = PlaintextParser.from_string(transcription, Tokenizer("english"))
37
  summarizer = LsaSummarizer()
38
- summary_sentences = summarizer(parser.document, 5) # Mengambil 5 kalimat terpenting
39
-
40
- # Menggabungkan ringkasan menjadi satu teks
41
  summarized_text = " ".join([str(sentence) for sentence in summary_sentences])
42
 
43
- # Menghitung jumlah token dalam teks asli dan hasil ringkasan
44
  original_tokens = len(nltk.word_tokenize(transcription))
45
  summarized_tokens = len(nltk.word_tokenize(summarized_text))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
 
47
- # Menyimpan hasil ringkasan
 
 
 
 
 
 
 
 
 
 
 
48
  summarized_file = save_to_file(summarized_text, 'summarized_transcription.txt')
 
 
49
 
50
- return summarized_text, summarized_file, audio_path, original_tokens, summarized_tokens
 
 
 
 
 
 
 
 
 
51
 
52
- # Antarmuka Gradio
53
- with gr.Blocks(title="AI-based Medical Transcription & Summarization with Groq API") as app:
54
- gr.Markdown("## Medical Transcription & Summarization with Groq API")
55
 
56
  with gr.Row():
57
  with gr.Column():
58
- audio_input = gr.Audio("microphone", type="filepath", label="🎙️ Rekam Suara")
59
- transcribe_button = gr.Button("🎧 Transkripsi & Ringkas dengan Whisper Large V3 (LSA 5 Kalimat)")
60
- summarize_box = gr.Textbox(label="📄 Ringkasan Transkripsi", lines=10, interactive=False)
61
- token_count_box = gr.Textbox(label="🔢 Info Token (Asli & Ringkasan", interactive=False)
62
 
63
  with gr.Column():
64
- download_audio = gr.File(label="⬇️ Download Rekaman")
65
- download_summary = gr.File(label="⬇️ Download Ringkasan")
 
 
 
 
 
 
 
 
66
 
67
  transcribe_button.click(
68
- transcribe_and_summarize,
69
  inputs=[audio_input],
70
- outputs=[summarize_box, download_summary, download_audio, token_count_box]
 
 
 
 
 
 
 
 
 
71
  )
72
 
73
- app.launch(share=True)
 
6
  from sumy.nlp.tokenizers import Tokenizer
7
  from sumy.summarizers.lsa import LsaSummarizer
8
 
9
+ # Download tokenizer NLTK
10
  nltk.download('punkt')
11
  nltk.download('punkt_tab')
12
 
13
+ # API Key Groq
14
  GROQ_API_KEY = "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"
 
 
15
  groq_client = Groq(api_key=GROQ_API_KEY)
16
 
17
  def save_to_file(content, filename):
 
18
  with open(filename, 'w', encoding='utf-8') as file:
19
  file.write(content)
20
  return filename
21
 
22
+ def transcribe_and_summarize_generate(audio_path):
23
+ # Transkripsi
24
  with open(audio_path, "rb") as audio_file:
25
  response = groq_client.audio.transcriptions.create(
26
  model="whisper-large-v3",
27
  file=audio_file,
28
  response_format="text"
29
  )
30
+ transcription = response
31
 
32
+ # Summarization LSA
33
  parser = PlaintextParser.from_string(transcription, Tokenizer("english"))
34
  summarizer = LsaSummarizer()
35
+ summary_sentences = summarizer(parser.document, 5)
 
 
36
  summarized_text = " ".join([str(sentence) for sentence in summary_sentences])
37
 
38
+ # Token info
39
  original_tokens = len(nltk.word_tokenize(transcription))
40
  summarized_tokens = len(nltk.word_tokenize(summarized_text))
41
+ token_info = f"Asli: {original_tokens} token | Ringkasan: {summarized_tokens} token"
42
+
43
+ # SOAP
44
+ prompt_soap = f"""
45
+ Anda adalah asisten medis yang membantu dokter dalam menyusun catatan SOAP berdasarkan percakapan dokter dan pasien.
46
+ Ringkaskan dalam bentuk paragraf tanpa adanya bullet point dan gunakan bahasa Indonesia.
47
+ Harap buat ringkasan dalam format berikut:
48
+ Subjective:
49
+ Objective:
50
+ Assessment:
51
+ Plan:
52
+
53
+ ### Percakapan:
54
+ {transcription}
55
+
56
+ Tolong jangan tambahkan informasi tambahan selain yang berkaitan dengan diagnosis, obat, hasil lab, dan radiologi.
57
+ """
58
+ response_soap = groq_client.chat.completions.create(
59
+ model="llama3-8b-8192",
60
+ messages=[{"role": "user", "content": prompt_soap}]
61
+ )
62
+ soap_content = response_soap.choices[0].message.content
63
+
64
+ # Tags
65
+ prompt_tags = f"""
66
+ Identifikasi dan berikan luaran dalam bahasa Indonesia tags berikut dari percakapan dengan format:
67
+ Diagnosis:
68
+ Obat:
69
+ Hasil Lab:
70
+ Radiologi:
71
 
72
+ ### Percakapan:
73
+ {transcription}
74
+
75
+ Tolong jangan tambahkan informasi tambahan selain yang berkaitan dengan diagnosis, obat, hasil lab, dan radiologi.
76
+ """
77
+ response_tags = groq_client.chat.completions.create(
78
+ model="llama3-8b-8192",
79
+ messages=[{"role": "user", "content": prompt_tags}]
80
+ )
81
+ tags_content = response_tags.choices[0].message.content
82
+
83
+ # Save files
84
  summarized_file = save_to_file(summarized_text, 'summarized_transcription.txt')
85
+ soap_file = save_to_file(soap_content, 'soap_summary.txt')
86
+ tags_file = save_to_file(tags_content, 'medical_tags.txt')
87
 
88
+ return (
89
+ summarized_text,
90
+ soap_content,
91
+ tags_content,
92
+ token_info,
93
+ summarized_file,
94
+ soap_file,
95
+ tags_file,
96
+ audio_path
97
+ )
98
 
99
+ # Gradio UI
100
+ with gr.Blocks(title="SOAP AI: Transkripsi dan Ringkasan Medis Otomatis") as app:
101
+ gr.Markdown("## 🧠 SOAP AI - Transkripsi, Ringkasan, dan Deteksi Medis Otomatis")
102
 
103
  with gr.Row():
104
  with gr.Column():
105
+ audio_input = gr.Audio("microphone", type="filepath", label="🎙️ Rekam Percakapan")
106
+ transcribe_button = gr.Button("🩺 Jalankan Proses Lengkap (Transkripsi + Ringkasan + SOAP + Tags)")
 
 
107
 
108
  with gr.Column():
109
+ summarize_box = gr.Textbox(label="📄 Ringkasan LSA (5 Kalimat)", lines=5, interactive=False)
110
+ soap_box = gr.Textbox(label="📋 Ringkasan SOAP", lines=8, interactive=False)
111
+ tags_box = gr.Textbox(label="🏷️ Medical Tags", lines=6, interactive=False)
112
+ token_box = gr.Textbox(label="🔢 Token Info", interactive=False)
113
+
114
+ with gr.Row():
115
+ download_summary = gr.File(label="⬇️ Download Ringkasan LSA")
116
+ download_soap = gr.File(label="⬇️ Download SOAP")
117
+ download_tags = gr.File(label="⬇️ Download Tags")
118
+ download_audio = gr.File(label="⬇️ Download Audio")
119
 
120
  transcribe_button.click(
121
+ transcribe_and_summarize_generate,
122
  inputs=[audio_input],
123
+ outputs=[
124
+ summarize_box,
125
+ soap_box,
126
+ tags_box,
127
+ token_box,
128
+ download_summary,
129
+ download_soap,
130
+ download_tags,
131
+ download_audio
132
+ ]
133
  )
134
 
135
+ app.launch(share=True)