ferguch9 commited on
Commit
4defedc
·
1 Parent(s): 0604510

feat: rewrite using model and tokenizer

Browse files
Files changed (2) hide show
  1. app.py +38 -37
  2. data.txt +1 -1
app.py CHANGED
@@ -1,49 +1,36 @@
1
  import streamlit as st
2
  from transformers import pipeline
3
- from transformers import AutoTokenizer
 
 
4
 
5
  @st.cache_resource
6
  def load_model():
7
- model = pipeline("summarization", model="facebook/bart-large-cnn")
8
  return model
9
 
 
 
 
 
10
 
11
- def generate_chunks(inp_str):
12
- max_chunk = 500
13
- inp_str = inp_str.replace('.', '.<eos>')
14
- inp_str = inp_str.replace('?', '?<eos>')
15
- inp_str = inp_str.replace('!', '!<eos>')
16
-
17
- sentences = inp_str.split('<eos>')
18
- current_chunk = 0
19
- chunks = []
20
- for sentence in sentences:
21
- if len(chunks) == current_chunk + 1:
22
- if len(chunks[current_chunk]) + len(sentence.split(' ')) <= max_chunk:
23
- chunks[current_chunk].extend(sentence.split(' '))
24
- else:
25
- current_chunk += 1
26
- chunks.append(sentence.split(' '))
27
- else:
28
- chunks.append(sentence.split(' '))
29
-
30
- for chunk_id in range(len(chunks)):
31
- chunks[chunk_id] = ' '.join(chunks[chunk_id])
32
- return chunks
33
-
34
-
35
- summarizer = load_model()
36
 
37
- st.title('Argo AI Summarisation')
 
38
 
39
  st.sidebar.title('Options')
40
- max = st.sidebar.slider('Max Length', 50, 1000, step=10, value=500)
41
- min = st.sidebar.slider('Min Length', 10, 500, step=10, value=100)
 
 
42
 
43
  textTab, docTab, audioTab = st.tabs(["Plain Text", "Text Document", "Audio File"])
44
 
45
  with textTab:
46
  sentence = st.text_area('Paste text to be summarised:', help='Paste text into text area and hit Summarise button', height=300)
 
47
 
48
  with docTab:
49
  st.text("Yet to be implemented...")
@@ -56,10 +43,24 @@ st.divider()
56
 
57
  with st.spinner("Generating Summary..."):
58
  if button and sentence:
59
- chunks = generate_chunks(sentence)
60
- res = summarizer(chunks,
61
- max_length=max,
62
- min_length=min,
63
- do_sample=False)
64
- text = ' '.join([summ['summary_text'] for summ in res])
65
- st.write(text)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
  from transformers import pipeline
3
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
4
+
5
+ checkpoint = "facebook/bart-large-cnn"
6
 
7
  @st.cache_resource
8
  def load_model():
9
+ model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint)
10
  return model
11
 
12
+ @st.cache_resource
13
+ def load_tokenizer():
14
+ tokenizer = AutoTokenizer.from_pretrained(checkpoint)
15
+ return tokenizer
16
 
17
+ model = load_model()
18
+ tokenizer = load_tokenizer()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
 
20
+ st.title('Summarisation Tool')
21
+ st.write(f"Performs basic summarisation of text and audit using the '{checkpoint}' model.")
22
 
23
  st.sidebar.title('Options')
24
+ summary_balance = st.sidebar.select_slider(
25
+ 'Output Summarisation Detail:',
26
+ options=['concise', 'balanced', 'full'],
27
+ value='balanced')
28
 
29
  textTab, docTab, audioTab = st.tabs(["Plain Text", "Text Document", "Audio File"])
30
 
31
  with textTab:
32
  sentence = st.text_area('Paste text to be summarised:', help='Paste text into text area and hit Summarise button', height=300)
33
+ st.write(f"{len(sentence)} characters and {len(sentence.split())} words")
34
 
35
  with docTab:
36
  st.text("Yet to be implemented...")
 
43
 
44
  with st.spinner("Generating Summary..."):
45
  if button and sentence:
46
+ chunks = [sentence]
47
+
48
+ text_words = len(sentence.split())
49
+ if summary_balance == 'concise':
50
+ min_multiplier = text_words * 0.1
51
+ max_multiplier = text_words * 0.3
52
+ elif summary_balance == 'full':
53
+ min_multiplier = text_words * 0.5
54
+ max_multiplier = text_words * 0.8
55
+ else:
56
+ min_multiplier = text_words * 0.2
57
+ max_multiplier = text_words * 0.5
58
+ min_tokens = int(min_multiplier)
59
+ max_tokens = int(max_multiplier)
60
+
61
+ print(f"min tokens {min_tokens}, max tokens {max_tokens}")
62
+ inputs = tokenizer([sentence], max_length=2048, return_tensors='pt', truncation=True)
63
+ summary_ids = model.generate(inputs['input_ids'], min_new_tokens=min_tokens, max_new_tokens=max_tokens, do_sample=False)
64
+ summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
65
+ st.write(summary)
66
+ st.write(f"{len(summary)} characters and {len(summary.split())} words")
data.txt CHANGED
@@ -1 +1 @@
1
- One of the big AI research conferences has been NuerIPS each December, which has been doubling attendance every year. But Nvidia’s GPU Technology Conference (GTC) is also another conference that drawn big crowds. Because the conference has been turned virtual, as have all conferences during the pandemic, it’s now easier to gather AI researcher from all over the world in one event. And Nvidia has gathered the top names in AI for this year’s GTC. Those top names in AI research include Geoffrey Hinton an Emeritus Professor from the University of Toronto, VP and Engineering Fellow at Google, and Chief Scientific Adviser to the Vector Institute. He is known for his pioneering work on Artificial Neural Networks (ANNs). His research on the backpropagation algorithm brought about drastic improvements in the performance of deep learning models. Hinton won the 2018 Turing Award for his groundbreaking work around deep neural networks, which he shares with Yann LeCun and Yoshua Bengio both of which are also speakers at GTC 2021. Yann LeCun is VP and Chief AI Scientist at Facebook and Silver Professor from New York University and considered the founding father of Convolutional Neural Networks (CNNs). He has won the 2014 IEEE Neural Network Pioneer Award and the 2015 PAMI Distinguished Researcher Award, as well as the aforementioned Turing Award. Yoshua Bengio is a Full Professor, and the Founder and Scientific Director at the University of Montreal and the Mila - Quebec Artificial Intelligence Institute. He received the prestigious award of Canada Research Chair in Statistical Learning Algorithms. Yoshua is known for his work on artificial neural networks and deep learning in the 1980s and 1990s. He co-created the prestigious International Conference on Learning Representations (ICLR) conference with Yann LeCun. These are three of the top AI researchers, but there are 23 other world-class researchers that will also be presenting at GTC and potentially thousands more in attendance. Some of the leading companies presenting at GTC include: Adobe, Arm, Audi, Autodesk, Epic Games, Facebook, Google, IBM, Industrial Light and Magic, Morgan Stanley, Pixar, Red Hat, Siemens, St. Jude Children’s Hospital, Verizon, VMWare, Walmart. Covering a very broad range of AI applications and implementations. Nvidia was the first to invest heavily in High-Performance Computing (HPC) on GPUs, originally called GPGPU. Nvidia led using CUDA platform to facilitate HPC on GPUs. That lead to researchers using GPU to accelerate digital neural networks, which lead to breakthrough performance and modern AI. This pioneering work led to Nvidia’s GPUs becoming the main research and development platform for deep learning neural nets. With Nvidia GPUs in data centers, advanced driver assistance systems (ADAS), drones, robots, workstations, and PCs, Nvidia has the most pervasive and scalable AI platform. Since then, other GPU and accelerator and software vendors have been chasing Nvidia. Being the first and widest platform means most researchers have experience using Nvidia GPUs, making GTC a conference with broad appeal in the AI, Automotive, HPC, and gaming industries. It also has strong appeal for a wide range of researchers in academia, atmospheric, biomedical, and geological to name a few. Nvidia’s GTC really is one of the conferences I look forward to every year where I learn about new uses for AI technology. The conference will begin with a keynote by Nvidia’s CEO Jensen Huang to reveal the latest Nvidia technology and designs on April 12. And did I mention it’s free? There are also instructor-led, hands-on training and workshops as part of the conference for $249 per person, but the main conference and talks you can attend for free.
 
1
+ The Industrial Revolution, often regarded as one of the most transformative periods in human history, commenced in Britain during the latter part of the 18th century and gradually spread across the globe. Its impact was profound and multifaceted, reshaping nearly every aspect of society, economy, and daily life. At its core was the rapid development and adoption of new technologies, such as steam power, mechanization, and the factory system. These innovations revolutionized traditional industries, enabling mass production on an unprecedented scale and significantly increasing efficiency and productivity. As a result, goods once crafted painstakingly by hand could now be manufactured swiftly and in large quantities, leading to an explosion in consumer markets and a surge in global trade. Concurrently, the revolution spurred urbanization as rural populations flocked to burgeoning industrial centers in search of employment opportunities, fundamentally altering the demographic landscape and giving rise to sprawling cities teeming with people and activity. However, the Industrial Revolution was not without its drawbacks and challenges. While it brought about undeniable economic growth and prosperity for many, it also engendered profound social inequalities and labor exploitation. Factory workers, including men, women, and children, toiled for long hours in hazardous conditions for meager wages, often subjected to harsh treatment by factory owners and overseers. Moreover, the relentless pursuit of profit and industrial expansion exacted a heavy toll on the environment, as factories spewed pollutants into the air and waterways, and deforestation accelerated to meet the insatiable demand for timber and resources. Despite these drawbacks, the Industrial Revolution fundamentally transformed the fabric of society and laid the groundwork for the modern world. It fueled technological innovation, spurred scientific inquiry, and catalyzed advancements in transportation, communication, and medicine. Moreover, it set the stage for the rise of capitalism and the development of modern economic systems, shaping the dynamics of global commerce and trade for centuries to come. Today, its legacy endures in the form of industrial infrastructure, urban landscapes, and societal norms, serving as a testament to humanity's capacity for innovation and adaptation in the face of profound change.