Spaces:

ericbotti
/

transcript-notetaker

Runtime error

App Files Files Community

Eric Botti commited on Jul 8, 2023

Commit

e5d260a

1 Parent(s): 8aa24e3

created streamlit interface

Browse files

Files changed (3) hide show

app.py +24 -0
main.py +50 -40
requirements.txt +0 -0

app.py ADDED Viewed

	@@ -0,0 +1,24 @@

+# standard
+from io import StringIO
+# 3rd party
+import streamlit as st
+# local
+import main
+st.set_page_config(page_title='Transcript Notetaker', page_icon=':memo:', layout='wide')
+st.write("Hello World")
+upload = st.file_uploader("Transcript", type='.txt')
+take_notes = st.button("Create Notes")
+if take_notes and upload:
+    upload_stringio = StringIO(upload.getvalue().decode('UTF-8'))
+    notes = main.create_meeting_notes(upload_stringio)
+if notes:
+    st.download_button("Download Notes", notes, "notes.md")
+    st.markdown(notes)

main.py CHANGED Viewed

@@ -5,6 +5,7 @@ import time
 import re
 # 3rd party
 from langchain.llms import OpenAI
 from langchain import LLMChain
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain import PromptTemplate
@@ -13,13 +14,31 @@ from langchain import PromptTemplate
 config = configparser.ConfigParser()
 config.read('config.ini')
-def load_transcript(path: str):
     # Google Meet Transcripts have a header which we don't want to be summarized
     header_lines = 5
-    with open(path, 'r') as input_file:
-        file_text = input_file.readlines()
     head = file_text[:header_lines]
     transcript = "".join(file_text[header_lines:])
@@ -27,35 +46,21 @@ def load_transcript(path: str):
     return head, transcript
-if __name__ == '__main__':
     # read config variables
-    if not os.getenv("OPENAI_API_KEY"):
-        os.environ["OPENAI_API_KEY"] = config['REQUIRED']['openai-api-key']
-    transcript_filepath = config['OPTIONAL']['transcript-filepath']
-    notes_filepath = config['OPTIONAL']['notes-filepath']
-    llm = OpenAI(temperature=0)
-    head, transcript = load_transcript(transcript_filepath)
     # split the transcript on the 5-min timestamps
-    regex_pattern = r"[0-9]{2}:[0-9]{2}:[0-9]{2}"
     five_min_chunks = re.split(regex_pattern, transcript)
     # create a textsplitter to subdivide those chunks into appropriately sized chunks.
-    text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=200)
-    # prompt
-    prompt = PromptTemplate(
-        template="Write a concise summary of the following: {transcript}",
-        input_variables=['transcript']
-    )
-    chain = LLMChain(
-        prompt=prompt,
-        llm=llm,
-        verbose=False
-    )
     # list the meeting time and the chunks associated with it
     timestamped_summaries = []
@@ -68,7 +73,7 @@ if __name__ == '__main__':
         sub_chunks = text_splitter.split_text(five_minutes_chunk)
         summaries = []
-        for j, chunk in enumerate(sub_chunks):
             summaries.append(chain.run(chunk))
             print(f"{timestamp}: Chunk {j}/{len(sub_chunks)}")
@@ -80,16 +85,21 @@ if __name__ == '__main__':
     first_line = re.split(r"[()]", head[0])
-    # Write summaries to file
-    with open(notes_filepath, 'w+') as f:
-        f.write(f"# {first_line[0]}\n")
-        f.write(f"{first_line[1]}\n")
-        f.write("## Attendees\n")
-        f.write(f"{head[2]}\n")
-        f.write('## Meeting Notes\n')
-        for timestamp, summaries in timestamped_summaries:
-            f.write(f"### {timestamp}\n")
-            for summary in summaries:
-                f.write(f"- {summary.strip()}\n")
-    print(f"Export to file {notes_filepath} completed")

 import re
 # 3rd party
 from langchain.llms import OpenAI
+from langchain.chat_models import ChatOpenAI
 from langchain import LLMChain
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain import PromptTemplate
 config = configparser.ConfigParser()
 config.read('config.ini')
+# read config variables
+if not os.getenv("OPENAI_API_KEY"):
+    os.environ["OPENAI_API_KEY"] = config['REQUIRED']['openai-api-key']
+# LangChain Config
+# llm
+llm = OpenAI(temperature=0)
+# prompt
+prompt = PromptTemplate(
+    template="Write a concise summary of the following: {transcript}",
+    input_variables=['transcript']
+)
+# chain
+chain = LLMChain(
+    prompt=prompt,
+    llm=llm,
+    verbose=False
+)
+def load_transcript(input_file):
     # Google Meet Transcripts have a header which we don't want to be summarized
     header_lines = 5
+    file_text = input_file.readlines()
     head = file_text[:header_lines]
     transcript = "".join(file_text[header_lines:])
     return head, transcript
+def create_meeting_notes(transcript_file):
     # read config variables
+    # if not os.getenv("OPENAI_API_KEY"):
+    #     os.environ["OPENAI_API_KEY"] = config['REQUIRED']['openai-api-key']
+    # transcript_filepath = config['OPTIONAL']['transcript-filepath']
+    # notes_filepath = config['OPTIONAL']['notes-filepath']
+    head, transcript = load_transcript(transcript_file)
     # split the transcript on the 5-min timestamps
+    regex_pattern = r"[0-9]{2}:[0-9]{2}:0{2}"
     five_min_chunks = re.split(regex_pattern, transcript)
     # create a textsplitter to subdivide those chunks into appropriately sized chunks.
+    text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=0)
     # list the meeting time and the chunks associated with it
     timestamped_summaries = []
         sub_chunks = text_splitter.split_text(five_minutes_chunk)
         summaries = []
+        for j, chunk in enumerate(sub_chunks, 1):
             summaries.append(chain.run(chunk))
             print(f"{timestamp}: Chunk {j}/{len(sub_chunks)}")
     first_line = re.split(r"[()]", head[0])
+    # Transcript Notes
+    meeting_notes = f'''# {first_line[0]}
+{first_line[1]}
+## Attendees
+{head[2]}## Meeting Notes
+'''
+    for timestamp, summaries in timestamped_summaries:
+        meeting_notes += f'### {timestamp}\n'
+        for summary in summaries:
+            meeting_notes += f"- {summary.strip()}\n"
+    meeting_notes += "\nEnd of Meeting"
+    return meeting_notes
+    # with open(notes_filepath, 'w+') as f:
+    #     f.write(meeting_notes)
+    # print(f"Export to file {notes_filepath} completed")

requirements.txt CHANGED Viewed

Binary files a/requirements.txt and b/requirements.txt differ