Vijayanand Sankarasubramanian commited on
Commit
240ad82
·
1 Parent(s): 0dbead4

updated UI to get constants

Browse files
.gitignore CHANGED
@@ -178,7 +178,6 @@ cython_debug/
178
  #downloaded deb files
179
  *.deb
180
 
181
- #cached embeddings
182
  cache
183
-
184
  flagged
 
 
178
  #downloaded deb files
179
  *.deb
180
 
 
181
  cache
 
182
  flagged
183
+ *.rtf
app.py CHANGED
@@ -1,67 +1,108 @@
1
  import gradio as gr
2
- from helpers.utils import load_rtf_document_and_chunk
3
- from helpers.summarize import summarize_with_map_reduce
4
- from helpers.model_utils import get_model
5
- from aspect_and_sentiment_extraction import extract_aspects_and_sentiment
6
- from answer_bot import answer_question
7
 
8
- transcript_file_name = None
9
-
10
- def summarize(transcript_file_name):
11
- chunked_docs = load_rtf_document_and_chunk(transcript_file_name)
12
-
13
- llm = get_model("OPENAI")
14
- return summarize_with_map_reduce(chunked_docs, llm)
15
-
16
- def extract_aspects(transcript_file_name):
17
- # Implement your aspect extraction and sentiment analysis logic here
18
- return extract_aspects_and_sentiment(transcript_file_name)
19
-
20
-
21
- def get_answer_for(user_question):
22
  if transcript_file_name is None:
23
  return "No Transcript Uploaded, Upload RTF File First", ""
 
 
24
 
25
  # Answer the user's question using the question-answering model
26
  if user_question.strip(): # Ensure there is a question provided
27
- answer_text = answer_question(question=user_question)
28
  else:
29
  answer_text = "No question asked."
30
 
31
- return answer_text.lstrip()
32
 
33
- def process_transcript(uploaded_file):
34
  if transcript_file_name is None:
35
  return "No Transcript Uploaded, Upload RTF File First", ""
36
 
37
  # Summarize the content
38
- summary = summarize(transcript_file_name=transcript_file_name).lstrip()
 
 
39
 
 
 
 
 
40
  # Aspect-Based Sentiment Analysis
41
- sentiment = extract_aspects(transcript_file_name=transcript_file_name).lstrip()
42
 
43
- return summary, sentiment
44
 
45
- def setup_rtf_file_handle(uploaded_file):
 
 
46
  transcript_file_name = uploaded_file.name
47
- print(f"Transcript File Name :{transcript_file_name}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
 
49
  with gr.Blocks() as demo:
 
 
 
50
  with gr.Group("Upload RTF File"):
51
  rtf_file = gr.File(label="Podcast Transcript RTF file")
52
  submit_button = gr.Button("Upload File")
53
- submit_button.click(setup_rtf_file_handle)
54
- with gr.Group("Aspects and Sentiment of Podcast"):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
  summary = gr.Textbox(label="Summary of Podcast")
 
 
 
56
  sentiment = gr.Textbox(label="Aspect Based Sentiments")
57
  submit_button = gr.Button("Generate Aspects and Summary")
58
- submit_button.click(process_transcript, inputs=rtf_file, outputs=[summary, sentiment])
59
-
60
  with gr.Group("Question/Answer"):
61
  gr.Markdown("Question/Answer")
62
  question = gr.Textbox(label="Question")
63
  answer = gr.Textbox(label="Answer")
64
  answer_button = gr.Button("Answer Question")
65
- answer_button.click(get_answer_for, inputs= question, outputs=answer)
66
 
67
  demo.launch()
 
1
  import gradio as gr
2
+ from helpers.model_utils import GPT3, GPT4, LLAMA3, ANTHROPIC, set_question_answer_llm, set_sentiment_analysis_llm, set_summarization_llm
3
+ from tools.summarize import MAPREDUCE, STUFF, summarize_podcast
4
+ from tools.answer_bot import answer_question
5
+ from tools.aspect_and_sentiment_extraction import extract_aspects_and_sentiment
 
6
 
7
+ def get_answer_for(user_question, transcript_file_name, llm_choice):
 
 
 
 
 
 
 
 
 
 
 
 
 
8
  if transcript_file_name is None:
9
  return "No Transcript Uploaded, Upload RTF File First", ""
10
+ if user_question is None:
11
+ return "Question Not Given"
12
 
13
  # Answer the user's question using the question-answering model
14
  if user_question.strip(): # Ensure there is a question provided
15
+ answer_text = answer_question(question=user_question, transcript_file_name=transcript_file_name, llm_choice=llm_choice)
16
  else:
17
  answer_text = "No question asked."
18
 
19
+ return answer_text.lstrip(), transcript_file_name, llm_choice
20
 
21
+ def summarize(uploaded_file, transcript_file_name, summarization_method, llm_choice):
22
  if transcript_file_name is None:
23
  return "No Transcript Uploaded, Upload RTF File First", ""
24
 
25
  # Summarize the content
26
+ summary = summarize_podcast(transcript_file_name=transcript_file_name, summarization_method=summarization_method, llm_choice=llm_choice).lstrip()
27
+
28
+ return summary, transcript_file_name, summarization_method, llm_choice
29
 
30
+ def generate_aspects_and_sentiments(uploaded_file, transcript_file_name, llm_choice):
31
+ if transcript_file_name is None:
32
+ return "No Transcript Uploaded, Upload RTF File First", ""
33
+
34
  # Aspect-Based Sentiment Analysis
35
+ sentiment = extract_aspects_and_sentiment(transcript_file_name=transcript_file_name, llm_choice=llm_choice).lstrip()
36
 
37
+ return sentiment, transcript_file_name, llm_choice
38
 
39
+ def setup_rtf_file_handle(uploaded_file, transcript_file_name):
40
+ if not uploaded_file:
41
+ return None
42
  transcript_file_name = uploaded_file.name
43
+ return transcript_file_name
44
+
45
+ def setup_summarization_llm(choice, llm_choice):
46
+ set_summarization_llm(choice)
47
+ llm_choice = choice
48
+ return choice, llm_choice
49
+
50
+ def setup_sentiment_analysis_llm(choice, llm_choice):
51
+ set_sentiment_analysis_llm(choice)
52
+ llm_choice = choice
53
+ return choice, llm_choice
54
+
55
+ def setup_question_answer_llm(choice, llm_choice):
56
+ set_question_answer_llm(choice)
57
+ llm_choice = choice
58
+ return choice, llm_choice
59
+
60
+ def setup_summarization_method(choice, summarization_method):
61
+ summarization_method = choice
62
+ return choice, summarization_method
63
+
64
+
65
+ llm_choices = [GPT3, GPT4, LLAMA3, ANTHROPIC]
66
+ summarize_method_choices = [MAPREDUCE, STUFF]
67
 
68
  with gr.Blocks() as demo:
69
+ transcript_file_name = gr.State()
70
+ summarization_method = gr.State()
71
+ llm_choice = gr.State()
72
  with gr.Group("Upload RTF File"):
73
  rtf_file = gr.File(label="Podcast Transcript RTF file")
74
  submit_button = gr.Button("Upload File")
75
+ submit_button.click(setup_rtf_file_handle, inputs=[rtf_file, transcript_file_name], outputs=transcript_file_name)
76
+ with gr.Group("LLM Selection"):
77
+ with gr.Row():
78
+ choice = gr.Radio(label="Summarization LLM", choices=llm_choices, value=GPT3)
79
+ output = gr.Textbox(label="", value=GPT3)
80
+ choice.change(setup_summarization_llm, inputs=[choice,llm_choice], outputs=[output,llm_choice])
81
+ with gr.Row():
82
+ choice = gr.Radio(label="Sentiment Analysis LLM", choices=llm_choices, value=GPT3)
83
+ output = gr.Textbox(label="", value=GPT3)
84
+ choice.change(setup_summarization_llm, inputs=[choice,llm_choice], outputs=[output,llm_choice])
85
+ with gr.Row():
86
+ choice = gr.Radio(label="Question/Answer LLM", choices=llm_choices, value=GPT3)
87
+ output = gr.Textbox(label="", value=GPT3)
88
+ choice.change(setup_summarization_llm, inputs=[choice,llm_choice], outputs=[output,llm_choice])
89
+ with gr.Group("Summarization Method"):
90
+ choice = gr.Radio(label="Summarization Method", choices=summarize_method_choices, value=MAPREDUCE)
91
+ output = gr.Textbox(label="", value=MAPREDUCE)
92
+ choice.change(setup_summarization_method, inputs=[choice, summarization_method], outputs=[output, summarization_method])
93
+ with gr.Group("Summarize Podcast"):
94
  summary = gr.Textbox(label="Summary of Podcast")
95
+ submit_button = gr.Button("Generate Summary")
96
+ submit_button.click(summarize, inputs=[rtf_file, transcript_file_name, summarization_method, llm_choice], outputs=[summary, transcript_file_name, summarization_method, llm_choice])
97
+ with gr.Group("Aspects and Sentiment of Podcast"):
98
  sentiment = gr.Textbox(label="Aspect Based Sentiments")
99
  submit_button = gr.Button("Generate Aspects and Summary")
100
+ submit_button.click(generate_aspects_and_sentiments, inputs=[rtf_file, transcript_file_name, llm_choice], outputs=[sentiment, transcript_file_name, llm_choice])
 
101
  with gr.Group("Question/Answer"):
102
  gr.Markdown("Question/Answer")
103
  question = gr.Textbox(label="Question")
104
  answer = gr.Textbox(label="Answer")
105
  answer_button = gr.Button("Answer Question")
106
+ answer_button.click(get_answer_for, inputs=[question, transcript_file_name, llm_choice], outputs=[answer, transcript_file_name, llm_choice])
107
 
108
  demo.launch()
helpers/import_envs.py CHANGED
@@ -1,9 +1,6 @@
1
  import os
2
  from dotenv import load_dotenv
3
- llm_model="OPENAI"
4
- # llm_model="LLAMA3"
5
- # llm_model_NAME="CLAUDE"
6
- rtf_file = "./data/Tim_O_Reilly_Podcast_text.rtf"
7
  index_name = "podcast_oracle_index"
8
  index_file = f"./{index_name}/index.faiss"
9
 
 
1
  import os
2
  from dotenv import load_dotenv
3
+
 
 
 
4
  index_name = "podcast_oracle_index"
5
  index_file = f"./{index_name}/index.faiss"
6
 
helpers/model_utils.py CHANGED
@@ -1,26 +1,41 @@
1
- from helpers.import_envs import llm_model, openai_api_key, anthropic_api_key
2
  from langchain_openai import OpenAI
3
  from langchain_anthropic import ChatAnthropic
4
  from langchain_community.llms import Ollama
 
5
 
6
- default_model = OpenAI(temperature=0, api_key=openai_api_key)
 
 
 
 
7
 
8
- def get_model(model_override = None):
9
- if model_override is not None:
10
- model_str = model_override
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  else:
12
- model_str = llm_model
13
- if model_str == "OPENAI":
14
- llm = default_model
15
- print(f"Model Name: {llm.model_name}");
16
- elif model_str == "CLAUDE":
17
- llm = ChatAnthropic(model_name="claude-2.1", anthropic_api_key=anthropic_api_key)
18
- print(f"Model Name: {llm.model}");
19
- elif model_str == "LLAMA3":
20
- # Now you can use `llm` for generating responses, etc.
21
- llm = Ollama(model="llama3")
22
- print(f"Model Name: {llm.model}");
23
- else:
24
- llm = default_model
25
- print(f"Model Name: {llm.model_name}");
26
  return llm
 
 
 
 
 
 
 
 
 
 
 
 
1
  from langchain_openai import OpenAI
2
  from langchain_anthropic import ChatAnthropic
3
  from langchain_community.llms import Ollama
4
+ from helpers.import_envs import openai_api_key, anthropic_api_key
5
 
6
+ GPT3 = "gpt-3.5"
7
+ GPT3_INSTRUCT = "gpt-3.5-instruct"
8
+ GPT4 = "gpt-4o"
9
+ LLAMA3 = "Llama3"
10
+ ANTHROPIC = "Claude2"
11
 
12
+ def _set_llm_based_on_choice(choice):
13
+ if choice == GPT3_INSTRUCT:
14
+ model_name = "gpt-3.5-turbo-instruct"
15
+ llm = OpenAI(model=model_name, temperature=0, api_key=openai_api_key)
16
+ elif choice == GPT3:
17
+ model_name = "gpt-3.5-turbo"
18
+ llm = OpenAI(model=model_name, temperature=0, api_key=openai_api_key)
19
+ elif choice == GPT4:
20
+ model_name = "gpt-4o"
21
+ llm = OpenAI(model=model_name, temperature=0, api_key=openai_api_key)
22
+ elif choice == ANTHROPIC:
23
+ model_name = "clause-2.1"
24
+ llm = ChatAnthropic(model_name=model_name, anthropic_api_key=anthropic_api_key)
25
+ elif choice == LLAMA3:
26
+ model_name = "llama3"
27
+ llm = Ollama(model=model_name)
28
  else:
29
+ model_name = "gpt-3.5-turbo"
30
+ llm = OpenAI(model=model_name, temperature=0, api_key=openai_api_key)
 
 
 
 
 
 
 
 
 
 
 
 
31
  return llm
32
+
33
+ def set_summarization_llm(choice = None):
34
+ return _set_llm_based_on_choice(choice)
35
+
36
+ def set_sentiment_analysis_llm(choice = None):
37
+ return _set_llm_based_on_choice(choice)
38
+
39
+ def set_question_answer_llm(choice = None):
40
+ return _set_llm_based_on_choice(choice)
41
+
helpers/utils.py CHANGED
@@ -4,7 +4,7 @@ from langchain_openai import OpenAIEmbeddings
4
  from langchain.storage import LocalFileStore
5
  from langchain.embeddings import CacheBackedEmbeddings
6
  from langchain_community.vectorstores import FAISS
7
- from helpers.import_envs import openai_api_key, rtf_file, index_file, index_name
8
  import pypandoc
9
 
10
  def load_rtf_document(file_path):
@@ -46,8 +46,8 @@ def embed_chunks(chunked_docs):
46
  vector_store.save_local(folder_path=index_name)
47
  return vector_store
48
 
49
- def create_or_load_vectore_store():
50
- chunked_docs = load_rtf_document_and_chunk(file_path=rtf_file)
51
 
52
  embedding_model = OpenAIEmbeddings(
53
  model="text-embedding-3-large", api_key=openai_api_key
 
4
  from langchain.storage import LocalFileStore
5
  from langchain.embeddings import CacheBackedEmbeddings
6
  from langchain_community.vectorstores import FAISS
7
+ from helpers.import_envs import openai_api_key, index_file, index_name
8
  import pypandoc
9
 
10
  def load_rtf_document(file_path):
 
46
  vector_store.save_local(folder_path=index_name)
47
  return vector_store
48
 
49
+ def create_or_load_vectore_store(transcript_file_name):
50
+ chunked_docs = load_rtf_document_and_chunk(file_path=transcript_file_name)
51
 
52
  embedding_model = OpenAIEmbeddings(
53
  model="text-embedding-3-large", api_key=openai_api_key
{helpers → tools}/__init__.py RENAMED
File without changes
answer_bot.py → tools/answer_bot.py RENAMED
@@ -4,10 +4,13 @@ from langchain_openai import ChatOpenAI
4
  from langchain.schema import StrOutputParser
5
  from langchain_core.runnables.passthrough import RunnablePassthrough
6
  from langchain.prompts import ChatPromptTemplate
 
 
 
 
7
 
8
- def answer_question(question):
9
  # Specify the path to the file you want to check
10
- vector_store = create_or_load_vectore_store()
11
 
12
  # create a prompt template to send to our LLM that will incorporate the documents from our retriever with the
13
  # question we ask the chat model
@@ -20,7 +23,7 @@ def answer_question(question):
20
 
21
  # create a chat model / LLM
22
  chat_model = ChatOpenAI(
23
- model="gpt-3.5-turbo", temperature=0, api_key=openai_api_key
24
  )
25
 
26
  # create a parser to parse the output of our LLM
@@ -38,6 +41,3 @@ def answer_question(question):
38
  answer = runnable_chain.invoke(question)
39
  print(answer)
40
  return answer
41
-
42
- # question = "What is the opinion of the speaker on open source?"
43
- # answer_question(question)
 
4
  from langchain.schema import StrOutputParser
5
  from langchain_core.runnables.passthrough import RunnablePassthrough
6
  from langchain.prompts import ChatPromptTemplate
7
+ from helpers.model_utils import set_question_answer_llm
8
+
9
+ def answer_question(question, transcript_file_name, llm_choice=None):
10
+ question_answer_llm = set_question_answer_llm(llm_choice)
11
 
 
12
  # Specify the path to the file you want to check
13
+ vector_store = create_or_load_vectore_store(transcript_file_name=transcript_file_name)
14
 
15
  # create a prompt template to send to our LLM that will incorporate the documents from our retriever with the
16
  # question we ask the chat model
 
23
 
24
  # create a chat model / LLM
25
  chat_model = ChatOpenAI(
26
+ model=question_answer_llm.model_name, temperature=0, api_key=openai_api_key
27
  )
28
 
29
  # create a parser to parse the output of our LLM
 
41
  answer = runnable_chain.invoke(question)
42
  print(answer)
43
  return answer
 
 
 
aspect_and_sentiment_extraction.py → tools/aspect_and_sentiment_extraction.py RENAMED
@@ -1,9 +1,9 @@
1
  from helpers.import_envs import openai_api_key
2
- from helpers.import_envs import rtf_file
3
  from langchain.prompts import PromptTemplate
4
  from langchain_openai import ChatOpenAI
5
  from langchain_core.runnables.passthrough import RunnablePassthrough
6
  from langchain.schema import StrOutputParser
 
7
  import re
8
 
9
  # Define the function to clean and extract text from RTF content
@@ -14,7 +14,8 @@ def extract_text_from_rtf(rtf_str):
14
  plain_text = plain_text.replace('\n', ' ').replace('\r', '')
15
  return plain_text
16
 
17
- def extract_aspects_and_sentiment(transcript_file_name):
 
18
  # Read the RTF file content
19
  with open(transcript_file_name, 'r') as file:
20
  rtf_content = file.read()
@@ -33,7 +34,7 @@ def extract_aspects_and_sentiment(transcript_file_name):
33
 
34
  # create a chat model / LLM
35
  chat_model = ChatOpenAI(
36
- model="gpt-3.5-turbo", temperature=0, api_key=openai_api_key
37
  )
38
 
39
  # create a parser to parse the output of our LLM
@@ -50,5 +51,3 @@ def extract_aspects_and_sentiment(transcript_file_name):
50
  answer = runnable_chain.invoke(document_text)
51
  print(answer)
52
  return answer
53
-
54
- # extract_aspects_and_sentiment(rtf_file)
 
1
  from helpers.import_envs import openai_api_key
 
2
  from langchain.prompts import PromptTemplate
3
  from langchain_openai import ChatOpenAI
4
  from langchain_core.runnables.passthrough import RunnablePassthrough
5
  from langchain.schema import StrOutputParser
6
+ from helpers.model_utils import set_sentiment_analysis_llm, GPT3
7
  import re
8
 
9
  # Define the function to clean and extract text from RTF content
 
14
  plain_text = plain_text.replace('\n', ' ').replace('\r', '')
15
  return plain_text
16
 
17
+ def extract_aspects_and_sentiment(transcript_file_name, llm_choice = None):
18
+ sentiment_analysis_llm = set_sentiment_analysis_llm(llm_choice)
19
  # Read the RTF file content
20
  with open(transcript_file_name, 'r') as file:
21
  rtf_content = file.read()
 
34
 
35
  # create a chat model / LLM
36
  chat_model = ChatOpenAI(
37
+ model=sentiment_analysis_llm.model_name, temperature=0, api_key=openai_api_key
38
  )
39
 
40
  # create a parser to parse the output of our LLM
 
51
  answer = runnable_chain.invoke(document_text)
52
  print(answer)
53
  return answer
 
 
{helpers → tools}/summarize.py RENAMED
@@ -1,5 +1,11 @@
1
  from langchain.chains.summarize import load_summarize_chain
2
  from helpers.prompts import BULLET_POINT_PROMPT
 
 
 
 
 
 
3
 
4
  """
5
  This method involves an initial prompt on each chunk of data * ( for summarization tasks, this could be a summary
@@ -17,8 +23,17 @@ def run_chain(chain, docs):
17
  summary = output['output_text']
18
  print(summary)
19
  return summary
 
 
 
 
 
 
 
 
20
 
21
- def summarize_with_map_reduce(docs, llm):
 
22
  chain = load_summarize_chain(llm=llm, chain_type="map_reduce", verbose=False)
23
 
24
  # prompt used by the chain for summarizing each part
@@ -29,24 +44,26 @@ def summarize_with_map_reduce(docs, llm):
29
  # print("prompt used by the chain for combining the parts:")
30
  # print(chain.combine_document_chain.llm_chain.prompt.template)
31
 
32
- return run_chain(chain=chain, docs=docs)
33
 
34
- def summarize_with_map_reduce_and_bullet_point_prompt(docs, llm):
 
35
  chain = load_summarize_chain(
36
- llm,
37
  chain_type="map_reduce",
38
  map_prompt=BULLET_POINT_PROMPT,
39
  combine_prompt=BULLET_POINT_PROMPT,
40
  )
41
 
42
- return run_chain(chain=chain, docs=docs)
43
 
44
 
45
  """
46
  Stuffing is the simplest method, whereby you simply stuff all the related data into the prompt as context to pass to
47
  the language model. This is implemented in LangChain as the StuffDocumentsChain.
48
  Pros: Only makes a single call to the LLM. When generating text, the LLM has access to all the data at once.
49
- Cons: Most LLMs have a context length, and for large documents (or many documents) this will not work as it will
 
50
  result in a prompt larger than the context length.
51
 
52
  The main downside of this method is that it only works one smaller pieces of data. Once you are working with many
@@ -54,9 +71,22 @@ pieces of data, this approach is no longer feasible. The next two approaches are
54
  """
55
 
56
 
57
- def summarize_with_stuff_chain(docs, llm):
58
- chain = load_summarize_chain(llm, chain_type="stuff")
 
59
  return run_chain(chain=chain, docs=docs)
60
 
61
- # chain = load_summarize_chain(llm, chain_type="stuff", prompt=BULLET_POINT_PROMPT)
62
  # run_chain(chain=chain, docs=docs)
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  from langchain.chains.summarize import load_summarize_chain
2
  from helpers.prompts import BULLET_POINT_PROMPT
3
+ from helpers.utils import load_rtf_document_and_chunk, load_rtf_document
4
+ from helpers.model_utils import set_summarization_llm, GPT3_INSTRUCT
5
+
6
+ MAPREDUCE="map-reduce"
7
+ STUFF="stuff"
8
+ summarization_method = MAPREDUCE
9
 
10
  """
11
  This method involves an initial prompt on each chunk of data * ( for summarization tasks, this could be a summary
 
23
  summary = output['output_text']
24
  print(summary)
25
  return summary
26
+ # prompt used by the chain for summarizing each part
27
+ # print("prompt used by the chain for summarizing each part:")
28
+ # print(chain.llm_chain.prompt.template)
29
+
30
+ # prompt used by the chain for combining the parts
31
+ # print("prompt used by the chain for combining the parts:")
32
+ # print(chain.combine_document_chain.llm_chain.promdocs
33
+
34
 
35
+ def _summarize_with_map_reduce(transcript_file_name, llm):
36
+ chunked_docs = load_rtf_document_and_chunk(transcript_file_name)
37
  chain = load_summarize_chain(llm=llm, chain_type="map_reduce", verbose=False)
38
 
39
  # prompt used by the chain for summarizing each part
 
44
  # print("prompt used by the chain for combining the parts:")
45
  # print(chain.combine_document_chain.llm_chain.prompt.template)
46
 
47
+ return run_chain(chain=chain, docs=chunked_docs)
48
 
49
+ def _summarize_with_map_reduce_and_bullet_point_prompt(transcript_file_name, llm):
50
+ chunked_docs = load_rtf_document_and_chunk(transcript_file_name)
51
  chain = load_summarize_chain(
52
+ llm=llm,
53
  chain_type="map_reduce",
54
  map_prompt=BULLET_POINT_PROMPT,
55
  combine_prompt=BULLET_POINT_PROMPT,
56
  )
57
 
58
+ return run_chain(chain=chain, docs=chunked_docs)
59
 
60
 
61
  """
62
  Stuffing is the simplest method, whereby you simply stuff all the related data into the prompt as context to pass to
63
  the language model. This is implemented in LangChain as the StuffDocumentsChain.
64
  Pros: Only makes a single call to the LLM. When generating text, the LLM has access to all the data at once.
65
+ Cons: Most LLMs have a context length, and for large documents (or many documen# extract_aspects_and_sentiment(rtf_file)
66
+ s) this will not work as it will
67
  result in a prompt larger than the context length.
68
 
69
  The main downside of this method is that it only works one smaller pieces of data. Once you are working with many
 
71
  """
72
 
73
 
74
+ def _summarize_with_stuff_chain(transcript_file_name, llm):
75
+ docs = load_rtf_document(transcript_file_name)
76
+ chain = load_summarize_chain(llm=llm, chain_type="stuff")
77
  return run_chain(chain=chain, docs=docs)
78
 
79
+ # chain = load_summarize_chain(llm=llm, chain_type="stuff", prompt=BULLET_POINT_PROMPT)
80
  # run_chain(chain=chain, docs=docs)
81
+
82
+
83
+ def summarize_podcast(transcript_file_name, summarization_method = None, llm_choice = None):
84
+ # override model
85
+ llm_choice = GPT3_INSTRUCT
86
+ llm = set_summarization_llm(llm_choice)
87
+ if summarization_method == MAPREDUCE:
88
+ return _summarize_with_map_reduce(transcript_file_name=transcript_file_name, llm=llm)
89
+ elif summarization_method == STUFF:
90
+ return _summarize_with_stuff_chain(transcript_file_name=transcript_file_name, llm=llm)
91
+ else:
92
+ return _summarize_with_map_reduce(transcript_file_name=transcript_file_name, llm=llm)