Spaces:

ZainabF
/

AllyArc

Runtime error

App Files Files Community

ZainabF commited on Jan 5, 2024

Commit

39e1299

1 Parent(s): 8cec68a

Upload 3 files

Browse files

Files changed (3) hide show

app.py +21 -20
requirements.txt +6 -0
summarizer.py +74 -0

app.py CHANGED Viewed

@@ -1,26 +1,27 @@
 import streamlit as st
-st.title("Echo Bot")
-# Initialize chat history
-if "messages" not in st.session_state:
-    st.session_state.messages = []
-# Display chat messages from history on app rerun
-for message in st.session_state.messages:
-    with st.chat_message(message["role"]):
-        st.markdown(message["content"])
-# React to user input
-if prompt := st.chat_input("What is up?"):
-    # Display user message in chat message container
-    st.chat_message("user").markdown(prompt)
-    # Add user message to chat history
-    st.session_state.messages.append({"role": "user", "content": prompt})
-    response = f"Echo: {prompt}"
-    # Display assistant response in chat message container
-    with st.chat_message("assistant"):
-        st.markdown(response)
-    # Add assistant response to chat history
-    st.session_state.messages.append({"role": "assistant", "content": response})

 import streamlit as st
+from summarizer import summarize_article
+# Set page title
+st.set_page_config(page_title="Article Summarizer", page_icon="📜", layout="wide")
+# Set title
+st.title("Article Summarizer", anchor=False)
+st.header("Summarize Articles with AI", anchor=False)
+# Input URL
+st.divider()
+url = st.text_input("Enter Article URL", value="")
+# Download audio
+st.divider()
+if url:
+    with st.status("Processing...", state="running", expanded=True) as status:
+        st.write("Summarizing Article...")
+        summary, time_taken = summarize_article(url)
+        status.update(label=f"Finished - Time Taken: {time_taken} seconds", state="complete")
+    # Show Summary
+    st.subheader("Summary:", anchor=False)
+    st.write(summary)

requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+streamlit
+langchain
+beautifulsoup4
+ctransformers
+transformers
+newspaper3k

summarizer.py ADDED Viewed

	@@ -0,0 +1,74 @@

+import os
+import time
+from langchain.chains import MapReduceDocumentsChain, LLMChain, ReduceDocumentsChain, StuffDocumentsChain
+from langchain.document_loaders import NewsURLLoader
+from langchain.llms import CTransformers
+from langchain.prompts import PromptTemplate
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+def summarize_article(article_url):
+    # Load article
+    loader = NewsURLLoader([article_url])
+    docs = loader.load()
+    # Load LLM
+    config = {'max_new_tokens': 4096, 'temperature': 0.7, 'context_length': 4096}
+    llm = CTransformers(model="TheBloke/Mistral-7B-Instruct-v0.1-GGUF",
+                        model_file="mistral-7b-instruct-v0.1.Q4_K_M.gguf",
+                        config=config,
+                        threads=os.cpu_count())
+    # Map template and chain
+    map_template = """<s>[INST] The following is a part of an article:
+    {docs}
+    Based on this, please identify the main points.
+    Answer:  [/INST] </s>"""
+    map_prompt = PromptTemplate.from_template(map_template)
+    map_chain = LLMChain(llm=llm, prompt=map_prompt)
+    # Reduce template and chain
+    reduce_template = """<s>[INST] The following is set of summaries from the article:
+    {doc_summaries}
+    Take these and distill it into a final, consolidated summary of the main points.
+    Construct it as a well organized summary of the main points and should be between 3 and 5 paragraphs.
+    Answer:  [/INST] </s>"""
+    reduce_prompt = PromptTemplate.from_template(reduce_template)
+    reduce_chain = LLMChain(llm=llm, prompt=reduce_prompt)
+    # Takes a list of documents, combines them into a single string, and passes this to an LLMChain
+    combine_documents_chain = StuffDocumentsChain(
+        llm_chain=reduce_chain, document_variable_name="doc_summaries"
+    )
+    # Combines and iteratively reduces the mapped documents
+    reduce_documents_chain = ReduceDocumentsChain(
+        # This is final chain that is called.
+        combine_documents_chain=combine_documents_chain,
+        # If documents exceed context for `StuffDocumentsChain`
+        collapse_documents_chain=combine_documents_chain,
+        # The maximum number of tokens to group documents into.
+        token_max=4000,
+    )
+    # Combining documents by mapping a chain over them, then combining results
+    map_reduce_chain = MapReduceDocumentsChain(
+        # Map chain
+        llm_chain=map_chain,
+        # Reduce chain
+        reduce_documents_chain=reduce_documents_chain,
+        # The variable name in the llm_chain to put the documents in
+        document_variable_name="docs",
+        # Return the results of the map steps in the output
+        return_intermediate_steps=True,
+    )
+    # Split documents into chunks
+    text_splitter = RecursiveCharacterTextSplitter(
+        chunk_size=4000, chunk_overlap=0
+    )
+    split_docs = text_splitter.split_documents(docs)
+    # Run the chain
+    start_time = time.time()
+    result = map_reduce_chain.__call__(split_docs, return_only_outputs=True)
+    time_taken = time.time() - start_time
+    return result['output_text'], time_taken