Spaces:
Build error
Build error
update gpt3write
Browse files
app.py
CHANGED
|
@@ -23,9 +23,9 @@ from openai import OpenAI
|
|
| 23 |
from concurrent.futures import ThreadPoolExecutor
|
| 24 |
import tiktoken
|
| 25 |
|
| 26 |
-
usemodelname='gpt-4-0125-preview'
|
| 27 |
|
| 28 |
-
def
|
|
|
|
| 29 |
|
| 30 |
response = openaiobj.chat.completions.create(
|
| 31 |
#model="gpt-3.5-turbo",
|
|
@@ -34,7 +34,7 @@ def call_openai_api(openaiobj,transcription,usemodelname):
|
|
| 34 |
messages=[
|
| 35 |
{
|
| 36 |
"role": "system",
|
| 37 |
-
"content": "
|
| 38 |
},
|
| 39 |
{
|
| 40 |
"role": "user",
|
|
@@ -43,8 +43,8 @@ def call_openai_api(openaiobj,transcription,usemodelname):
|
|
| 43 |
]
|
| 44 |
)
|
| 45 |
return response.choices[0].message.content
|
| 46 |
-
def call_openai_summary(openaiobj,transcription):
|
| 47 |
-
|
| 48 |
response = openaiobj.chat.completions.create(
|
| 49 |
#model="gpt-3.5-turbo",
|
| 50 |
model=usemodelname,
|
|
@@ -52,7 +52,7 @@ def call_openai_summary(openaiobj,transcription):
|
|
| 52 |
messages=[
|
| 53 |
{
|
| 54 |
"role": "system",
|
| 55 |
-
"content": "
|
| 56 |
},
|
| 57 |
{
|
| 58 |
"role": "user",
|
|
@@ -70,7 +70,7 @@ def call_openai_summaryall(openaiobj,transcription,usemodelname):
|
|
| 70 |
messages=[
|
| 71 |
{
|
| 72 |
"role": "system",
|
| 73 |
-
"content": "
|
| 74 |
},
|
| 75 |
{
|
| 76 |
"role": "user",
|
|
@@ -83,16 +83,17 @@ def call_openai_summaryall(openaiobj,transcription,usemodelname):
|
|
| 83 |
|
| 84 |
|
| 85 |
|
| 86 |
-
def split_into_chunks(text, tokens=15900):
|
| 87 |
#encoding = tiktoken.encoding_for_model('gpt-3.5-turbo')
|
| 88 |
-
encoding = tiktoken.encoding_for_model(
|
| 89 |
words = encoding.encode(text)
|
| 90 |
chunks = []
|
| 91 |
for i in range(0, len(words), tokens):
|
| 92 |
chunks.append(' '.join(encoding.decode(words[i:i + tokens])))
|
| 93 |
return chunks
|
| 94 |
|
| 95 |
-
def
|
|
|
|
| 96 |
# openaiobj = OpenAI(
|
| 97 |
# # This is the default and can be omitted
|
| 98 |
|
|
@@ -108,21 +109,21 @@ def process_chunks(openaikeystr,inputtext,LLMmodel):
|
|
| 108 |
text = inputtext
|
| 109 |
#openaikey.set_key(openaikeystr)
|
| 110 |
#print('process_chunk',openaikey.get_key())
|
| 111 |
-
chunks = split_into_chunks(text)
|
| 112 |
-
|
| 113 |
i=1
|
| 114 |
if len(chunks)>1:
|
| 115 |
-
|
| 116 |
for chunk in chunks:
|
| 117 |
|
| 118 |
-
response=response+'第' +str(i)+'段\n'+
|
| 119 |
i=i+1
|
| 120 |
-
finalresponse=response+'\n\n 這是根據以上分段會議紀錄彙編如下 \n\n' +
|
| 121 |
# response=response+call_openai_summary(openaiobj,chunk)
|
| 122 |
|
| 123 |
|
| 124 |
else:
|
| 125 |
-
finalresponse=
|
| 126 |
return finalresponse
|
| 127 |
# # Processes chunks in parallel
|
| 128 |
# with ThreadPoolExecutor() as executor:
|
|
@@ -234,6 +235,8 @@ file_transcribe = gr.Interface(
|
|
| 234 |
allow_flagging="never",
|
| 235 |
)
|
| 236 |
import google.generativeai as genai
|
|
|
|
|
|
|
| 237 |
def gpt4write(openaikeystr,transcribe_text,LLMmodel):
|
| 238 |
# openaiobj = OpenAI(
|
| 239 |
# # This is the default and can be omitted
|
|
@@ -253,7 +256,7 @@ def gpt4write(openaikeystr,transcribe_text,LLMmodel):
|
|
| 253 |
#chunks = split_into_chunks(text)
|
| 254 |
#response='這是分段會議紀錄結果\n\n'
|
| 255 |
|
| 256 |
-
finalresponse=
|
| 257 |
# response=response+call_openai_summary(openaiobj,chunk)
|
| 258 |
return finalresponse
|
| 259 |
|
|
@@ -274,7 +277,7 @@ def writenotes( LLMmodel,apikeystr,inputscript):
|
|
| 274 |
if len(inputscript)>10: #有資料表示不是來自語音辨識結果
|
| 275 |
transcribe_text=inputscript
|
| 276 |
if LLMmodel=="gpt-3.5-turbo":
|
| 277 |
-
ainotestext=
|
| 278 |
elif LLMmodel=="gpt-4-0125-preview":
|
| 279 |
ainotestext=gpt4write(apikeystr,transcribe_text,LLMmodel)
|
| 280 |
elif LLMmodel=='gemini':
|
|
|
|
| 23 |
from concurrent.futures import ThreadPoolExecutor
|
| 24 |
import tiktoken
|
| 25 |
|
|
|
|
| 26 |
|
| 27 |
+
def call_openai_makenote(openaiobj,transcription,usemodelname):
|
| 28 |
+
## 直接做會議紀錄,GPT4或GPT 3.5但小於16K
|
| 29 |
|
| 30 |
response = openaiobj.chat.completions.create(
|
| 31 |
#model="gpt-3.5-turbo",
|
|
|
|
| 34 |
messages=[
|
| 35 |
{
|
| 36 |
"role": "system",
|
| 37 |
+
"content": "你是專業的會議紀錄製作員,請根據由語音辨識軟體將會議錄音所轉錄的逐字稿,也請注意逐字稿可能有錯,請先做校正,討論內容細節請略過,請列出會議決議,並要用比較正式及容易閱讀的寫法,避免口語化"
|
| 38 |
},
|
| 39 |
{
|
| 40 |
"role": "user",
|
|
|
|
| 43 |
]
|
| 44 |
)
|
| 45 |
return response.choices[0].message.content
|
| 46 |
+
def call_openai_summary(openaiobj,transcription,usemodelname):
|
| 47 |
+
## 分段摘要
|
| 48 |
response = openaiobj.chat.completions.create(
|
| 49 |
#model="gpt-3.5-turbo",
|
| 50 |
model=usemodelname,
|
|
|
|
| 52 |
messages=[
|
| 53 |
{
|
| 54 |
"role": "system",
|
| 55 |
+
"content": "你是專業的會議紀錄製作員,請根據由語音辨識軟體將會議錄音所轉錄的逐字稿,也請注意逐字稿可能有錯,請先校正,再摘要會議重點內容"
|
| 56 |
},
|
| 57 |
{
|
| 58 |
"role": "user",
|
|
|
|
| 70 |
messages=[
|
| 71 |
{
|
| 72 |
"role": "system",
|
| 73 |
+
"content": "你是專業的會議紀錄製作員,請根據分段的會議摘要,彙整成正式會議紀錄,並要用比較正式及容易閱讀的寫法,避免口語化"
|
| 74 |
},
|
| 75 |
{
|
| 76 |
"role": "user",
|
|
|
|
| 83 |
|
| 84 |
|
| 85 |
|
| 86 |
+
def split_into_chunks(text,LLMmodel, tokens=15900):
|
| 87 |
#encoding = tiktoken.encoding_for_model('gpt-3.5-turbo')
|
| 88 |
+
encoding = tiktoken.encoding_for_model(LLMmodel)
|
| 89 |
words = encoding.encode(text)
|
| 90 |
chunks = []
|
| 91 |
for i in range(0, len(words), tokens):
|
| 92 |
chunks.append(' '.join(encoding.decode(words[i:i + tokens])))
|
| 93 |
return chunks
|
| 94 |
|
| 95 |
+
def gpt3write(openaikeystr,inputtext,LLMmodel):
|
| 96 |
+
|
| 97 |
# openaiobj = OpenAI(
|
| 98 |
# # This is the default and can be omitted
|
| 99 |
|
|
|
|
| 109 |
text = inputtext
|
| 110 |
#openaikey.set_key(openaikeystr)
|
| 111 |
#print('process_chunk',openaikey.get_key())
|
| 112 |
+
chunks = split_into_chunks(text,LLMmodel)
|
| 113 |
+
|
| 114 |
i=1
|
| 115 |
if len(chunks)>1:
|
| 116 |
+
response='這是分段會議紀錄摘要\n\n'
|
| 117 |
for chunk in chunks:
|
| 118 |
|
| 119 |
+
response=response+'第' +str(i)+'段\n'+call_openai_summary(openaiobj,chunk,LLMmodel)+'\n\n'
|
| 120 |
i=i+1
|
| 121 |
+
finalresponse=response+'\n\n 這是根據以上分段會議紀錄彙編如下 \n\n' +call_openai_summaryall(openaiobj,response,LLMmodel)
|
| 122 |
# response=response+call_openai_summary(openaiobj,chunk)
|
| 123 |
|
| 124 |
|
| 125 |
else:
|
| 126 |
+
finalresponse=call_openai_makenote(openaiobj,inputtext,LLMmodel)
|
| 127 |
return finalresponse
|
| 128 |
# # Processes chunks in parallel
|
| 129 |
# with ThreadPoolExecutor() as executor:
|
|
|
|
| 235 |
allow_flagging="never",
|
| 236 |
)
|
| 237 |
import google.generativeai as genai
|
| 238 |
+
|
| 239 |
+
|
| 240 |
def gpt4write(openaikeystr,transcribe_text,LLMmodel):
|
| 241 |
# openaiobj = OpenAI(
|
| 242 |
# # This is the default and can be omitted
|
|
|
|
| 256 |
#chunks = split_into_chunks(text)
|
| 257 |
#response='這是分段會議紀錄結果\n\n'
|
| 258 |
|
| 259 |
+
finalresponse=call_openai_makenote(openaiobj,transcribe_text,LLMmodel)
|
| 260 |
# response=response+call_openai_summary(openaiobj,chunk)
|
| 261 |
return finalresponse
|
| 262 |
|
|
|
|
| 277 |
if len(inputscript)>10: #有資料表示不是來自語音辨識結果
|
| 278 |
transcribe_text=inputscript
|
| 279 |
if LLMmodel=="gpt-3.5-turbo":
|
| 280 |
+
ainotestext=gpt3write(apikeystr,transcribe_text,LLMmodel)
|
| 281 |
elif LLMmodel=="gpt-4-0125-preview":
|
| 282 |
ainotestext=gpt4write(apikeystr,transcribe_text,LLMmodel)
|
| 283 |
elif LLMmodel=='gemini':
|