ZainabF commited on
Commit
39e1299
·
1 Parent(s): 8cec68a

Upload 3 files

Browse files
Files changed (3) hide show
  1. app.py +21 -20
  2. requirements.txt +6 -0
  3. summarizer.py +74 -0
app.py CHANGED
@@ -1,26 +1,27 @@
1
  import streamlit as st
 
2
 
3
- st.title("Echo Bot")
4
 
5
- # Initialize chat history
6
- if "messages" not in st.session_state:
7
- st.session_state.messages = []
8
 
9
- # Display chat messages from history on app rerun
10
- for message in st.session_state.messages:
11
- with st.chat_message(message["role"]):
12
- st.markdown(message["content"])
13
 
14
- # React to user input
15
- if prompt := st.chat_input("What is up?"):
16
- # Display user message in chat message container
17
- st.chat_message("user").markdown(prompt)
18
- # Add user message to chat history
19
- st.session_state.messages.append({"role": "user", "content": prompt})
20
 
21
- response = f"Echo: {prompt}"
22
- # Display assistant response in chat message container
23
- with st.chat_message("assistant"):
24
- st.markdown(response)
25
- # Add assistant response to chat history
26
- st.session_state.messages.append({"role": "assistant", "content": response})
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
+ from summarizer import summarize_article
3
 
 
4
 
5
+ # Set page title
6
+ st.set_page_config(page_title="Article Summarizer", page_icon="📜", layout="wide")
 
7
 
 
 
 
 
8
 
9
+ # Set title
10
+ st.title("Article Summarizer", anchor=False)
11
+ st.header("Summarize Articles with AI", anchor=False)
 
 
 
12
 
13
+ # Input URL
14
+ st.divider()
15
+ url = st.text_input("Enter Article URL", value="")
16
+
17
+ # Download audio
18
+ st.divider()
19
+ if url:
20
+ with st.status("Processing...", state="running", expanded=True) as status:
21
+ st.write("Summarizing Article...")
22
+ summary, time_taken = summarize_article(url)
23
+ status.update(label=f"Finished - Time Taken: {time_taken} seconds", state="complete")
24
+
25
+ # Show Summary
26
+ st.subheader("Summary:", anchor=False)
27
+ st.write(summary)
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ streamlit
2
+ langchain
3
+ beautifulsoup4
4
+ ctransformers
5
+ transformers
6
+ newspaper3k
summarizer.py ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import time
3
+ from langchain.chains import MapReduceDocumentsChain, LLMChain, ReduceDocumentsChain, StuffDocumentsChain
4
+ from langchain.document_loaders import NewsURLLoader
5
+ from langchain.llms import CTransformers
6
+ from langchain.prompts import PromptTemplate
7
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
8
+
9
+
10
+ def summarize_article(article_url):
11
+ # Load article
12
+ loader = NewsURLLoader([article_url])
13
+ docs = loader.load()
14
+
15
+ # Load LLM
16
+ config = {'max_new_tokens': 4096, 'temperature': 0.7, 'context_length': 4096}
17
+ llm = CTransformers(model="TheBloke/Mistral-7B-Instruct-v0.1-GGUF",
18
+ model_file="mistral-7b-instruct-v0.1.Q4_K_M.gguf",
19
+ config=config,
20
+ threads=os.cpu_count())
21
+
22
+ # Map template and chain
23
+ map_template = """<s>[INST] The following is a part of an article:
24
+ {docs}
25
+ Based on this, please identify the main points.
26
+ Answer: [/INST] </s>"""
27
+ map_prompt = PromptTemplate.from_template(map_template)
28
+ map_chain = LLMChain(llm=llm, prompt=map_prompt)
29
+
30
+ # Reduce template and chain
31
+ reduce_template = """<s>[INST] The following is set of summaries from the article:
32
+ {doc_summaries}
33
+ Take these and distill it into a final, consolidated summary of the main points.
34
+ Construct it as a well organized summary of the main points and should be between 3 and 5 paragraphs.
35
+ Answer: [/INST] </s>"""
36
+ reduce_prompt = PromptTemplate.from_template(reduce_template)
37
+ reduce_chain = LLMChain(llm=llm, prompt=reduce_prompt)
38
+
39
+ # Takes a list of documents, combines them into a single string, and passes this to an LLMChain
40
+ combine_documents_chain = StuffDocumentsChain(
41
+ llm_chain=reduce_chain, document_variable_name="doc_summaries"
42
+ )
43
+ # Combines and iteratively reduces the mapped documents
44
+ reduce_documents_chain = ReduceDocumentsChain(
45
+ # This is final chain that is called.
46
+ combine_documents_chain=combine_documents_chain,
47
+ # If documents exceed context for `StuffDocumentsChain`
48
+ collapse_documents_chain=combine_documents_chain,
49
+ # The maximum number of tokens to group documents into.
50
+ token_max=4000,
51
+ )
52
+ # Combining documents by mapping a chain over them, then combining results
53
+ map_reduce_chain = MapReduceDocumentsChain(
54
+ # Map chain
55
+ llm_chain=map_chain,
56
+ # Reduce chain
57
+ reduce_documents_chain=reduce_documents_chain,
58
+ # The variable name in the llm_chain to put the documents in
59
+ document_variable_name="docs",
60
+ # Return the results of the map steps in the output
61
+ return_intermediate_steps=True,
62
+ )
63
+
64
+ # Split documents into chunks
65
+ text_splitter = RecursiveCharacterTextSplitter(
66
+ chunk_size=4000, chunk_overlap=0
67
+ )
68
+ split_docs = text_splitter.split_documents(docs)
69
+
70
+ # Run the chain
71
+ start_time = time.time()
72
+ result = map_reduce_chain.__call__(split_docs, return_only_outputs=True)
73
+ time_taken = time.time() - start_time
74
+ return result['output_text'], time_taken