syafiqq02 commited on
Commit
9c86826
·
1 Parent(s): 9133b7c
Files changed (4) hide show
  1. Dockerfile +12 -0
  2. app/main.py +137 -0
  3. app/nltk.py +4 -0
  4. app/requirements.txt +8 -0
Dockerfile ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10-slim
2
+
3
+ WORKDIR /code
4
+
5
+ COPY app /code/app
6
+
7
+ RUN pip install --upgrade pip
8
+ RUN pip install -r /code/app/requirements.txt
9
+
10
+ EXPOSE 7860
11
+
12
+ CMD python app/prepare.py && uvicorn app.main:app --host 0.0.0.0 --port 7860
app/main.py ADDED
@@ -0,0 +1,137 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import nltk
3
+ import uvicorn
4
+ from fastapi import FastAPI, File, UploadFile
5
+ from pydantic import BaseModel
6
+ from sumy.parsers.plaintext import PlaintextParser
7
+ from sumy.nlp.tokenizers import Tokenizer
8
+ from sumy.summarizers.lsa import LsaSummarizer
9
+ from groq import Groq
10
+
11
+ GROQ_API_KEY = "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"
12
+ groq_client = Groq(api_key=GROQ_API_KEY)
13
+
14
+ def save_to_file(content: str, filename: str) -> str:
15
+ with open(filename, 'w', encoding='utf-8') as file:
16
+ file.write(content)
17
+ return filename
18
+
19
+ def transcribe_and_summarize(audio_path: str):
20
+ with open(audio_path, "rb") as audio_file:
21
+ response = groq_client.audio.transcriptions.create(
22
+ model="whisper-large-v3",
23
+ file=audio_file,
24
+ response_format="text"
25
+ )
26
+ transcription = response
27
+
28
+ parser = PlaintextParser.from_string(transcription, Tokenizer("english"))
29
+ summarizer = LsaSummarizer()
30
+ summary_sentences = summarizer(parser.document, 5)
31
+ summarized_text = " ".join(str(s) for s in summary_sentences)
32
+
33
+ original_tokens = len(nltk.word_tokenize(transcription))
34
+ summarized_tokens = len(nltk.word_tokenize(summarized_text))
35
+ token_info = f"Asli: {original_tokens} token | Ringkasan: {summarized_tokens} token"
36
+
37
+ summarized_file = save_to_file(summarized_text, "summarized_transcription.txt")
38
+
39
+ return transcription, summarized_text, summarized_file, audio_path, token_info
40
+
41
+ def summarize_soap(dialogue: str):
42
+ prompt_soap = f"""
43
+ Anda adalah asisten medis yang membantu dokter dalam menyusun catatan SOAP berdasarkan percakapan dokter dan pasien.
44
+ Ringkaskan dalam bentuk paragraf tanpa adanya bullet point dan gunakan bahasa Indonesia.
45
+ Harap buat ringkasan dalam format berikut:
46
+ Subjective:
47
+ Objective:
48
+ Assessment:
49
+ Plan:
50
+
51
+ ### Percakapan:
52
+ {dialogue}
53
+
54
+ Tolong jangan tambahkan informasi tambahan selain yang berkaitan dengan diagnosis, obat, hasil lab, dan radiologi.
55
+ """
56
+ response_soap = groq_client.chat.completions.create(
57
+ model="llama3-8b-8192",
58
+ messages=[{"role": "user", "content": prompt_soap}]
59
+ )
60
+ return response_soap.choices[0].message.content
61
+
62
+ def generate_soap(transcription: str):
63
+ soap_content = summarize_soap(transcription)
64
+ soap_file = save_to_file(soap_content, "soap_summary.txt")
65
+ return soap_content, soap_file
66
+
67
+ def detect_medical_tags(dialogue: str):
68
+ prompt_tags = f"""
69
+ Identifikasi dan berikan luaran dalam bahasa Indonesia tags berikut dari percakapan dengan format:
70
+ Diagnosis:
71
+ Obat:
72
+ Hasil Lab:
73
+ Radiologi:
74
+
75
+ ### Percakapan:
76
+ {dialogue}
77
+
78
+ Tolong jangan tambahkan informasi tambahan selain yang berkaitan dengan diagnosis, obat, hasil lab, dan radiologi.
79
+ """
80
+ response_tags = groq_client.chat.completions.create(
81
+ model="llama3-8b-8192",
82
+ messages=[{"role": "user", "content": prompt_tags}]
83
+ )
84
+ return response_tags.choices[0].message.content
85
+
86
+ def generate_tags(transcription: str):
87
+ tags_content = detect_medical_tags(transcription)
88
+ tags_file = save_to_file(tags_content, "medical_tags.txt")
89
+ return tags_content, tags_file
90
+
91
+ app = FastAPI(title="Medical Transcription Pipeline (Groq API)")
92
+
93
+ @app.post("/full_process")
94
+ async def full_process(audio: UploadFile = File(...)):
95
+ filename = audio.filename
96
+ temp_audio_path = f"temp_{filename}"
97
+ with open(temp_audio_path, "wb") as f:
98
+ f.write(await audio.read())
99
+
100
+ transcription, summarized_text, summarized_file, audio_path, token_info = transcribe_and_summarize(temp_audio_path)
101
+
102
+ soap_content, soap_file = generate_soap(transcription)
103
+ tags_content, tags_file = generate_tags(transcription)
104
+
105
+ os.remove(temp_audio_path) # bersihkan file temporer setelah selesai digunakan
106
+
107
+ return {
108
+ "transcription": transcription,
109
+ "summarized_text": summarized_text,
110
+ "summarized_file": summarized_file,
111
+ "audio_path": audio_path,
112
+ "token_info": token_info,
113
+ "soap_content": soap_content,
114
+ "soap_file": soap_file,
115
+ "tags_content": tags_content,
116
+ "tags_file": tags_file
117
+ }
118
+
119
+ class TranscriptionInput(BaseModel):
120
+ dialogue: str
121
+
122
+ @app.post("/soap_tags")
123
+ async def soap_tags(data: TranscriptionInput):
124
+ transcript_text = data.dialogue
125
+
126
+ soap_content, soap_file = generate_soap(transcript_text)
127
+ tags_content, tags_file = generate_tags(transcript_text)
128
+
129
+ return {
130
+ "soap_content": soap_content,
131
+ "soap_file": soap_file,
132
+ "tags_content": tags_content,
133
+ "tags_file": tags_file
134
+ }
135
+
136
+ if __name__ == "__main__":
137
+ uvicorn.run("main:app", host="0.0.0.0", port=8000, reload=True)
app/nltk.py ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ import nltk
2
+
3
+ nltk.download("punkt")
4
+ nltk.download("punkt_tab")
app/requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ fastapi==0.115.8
2
+ faster_whisper==1.1.1
3
+ huggingface_hub==0.28.1
4
+ soundfile==0.13.1
5
+ sumy==0.11.0
6
+ nltk==3.9.1
7
+ uvicorn==0.34.0
8
+ groq==0.18.0