Spaces:

peter2000
/

policy_test

Runtime error

App Files Files Community

policy_test / app.py

peter2000

Update app.py

d162bf7 over 3 years ago

raw

history blame

5.1 kB

	import streamlit as st
	st.set_page_config(f'SDSN x GIZ Policy Tracing', layout="wide")

	import seaborn as sns
	import pdfplumber
	from pandas import DataFrame
	import matplotlib.pyplot as plt
	import numpy as np
	import streamlit as st
	import sentence-transformers



	##@st.cache(allow_output_mutation=True)
	def load_model():
	return KeyBERT()

	def read_(file):
	if file is not None:
	text = []
	with pdfplumber.open(file) as pdf:
	for page in pdf.pages:
	text.append(page.extract_text())
	text_str = ' '.join([page for page in text])
	st.write('Document:', pdf.metadata)
	st.write('Number of pages:',len(pdf.pages))
	pdf.close()
	return text_str



	st.sidebar.image(
	"https://github.com/gizdatalab/policy_tracing/blob/main/img/sdsn.png?raw=true",
	use_column_width=True
	)
	st.sidebar.markdown("## 📌 Step One: Upload document ")

	with st.sidebar:
	file = st.file_uploader('Upload PDF File', type=['pdf'])

	st.sidebar.title(
	"Options:"
	)

	st.sidebar.markdown(
	"You can freely browse the different chapters - ie example prompts from different people - and see the results."
	)

	selected_date = st.sidebar.selectbox(
	"Please select the chapter you want to read:",
	['c1','c2']
	)

	with st.container():
	st.markdown("<h1 style='text-align: center; color: black;'> SDSN X GIZ - Policy Action Tracking</h1>", unsafe_allow_html=True)
	st.write(' ')
	st.write(' ')

	with st.expander("ℹ️ - About this app", expanded=True):

	st.write(
	"""
	The Policy Action Tracker app is an easy-to-use interface built with Streamlit for analyzing policy documents - developed by GIZ Data and the Sustainable Development Solution Network.

	It uses a minimal keyword extraction technique that leverages multiple NLP embeddings and relies on [Transformers] (https://huggingface.co/transformers/) 🤗 to create keywords/keyphrases that are most similar to a document.
	"""
	)

	st.markdown("")
	st.markdown("")
	#st.markdown("## 📌 Step One: Upload document ")


	with st.container():
	st.markdown("## 📌 Step One: Upload document ")
	##file = st.file_uploader('Upload PDF File', type=['pdf'])
	text_str = read_(file)


	import seaborn as sns
	import pdfplumber
	from pandas import DataFrame
	from keybert import KeyBERT
	import matplotlib.pyplot as plt
	import numpy as np
	import streamlit as st



	@st.cache(allow_output_mutation=True)
	def load_model():
	return KeyBERT()

	kw_model = load_model()

	keywords = kw_model.extract_keywords(
	text_str,
	keyphrase_ngram_range=(1, 2),
	use_mmr=True,
	stop_words="english",
	top_n=10,
	diversity=0.7,
	)

	st.markdown("## 🎈 What is my document about?")

	df = (
	DataFrame(keywords, columns=["Keyword/Keyphrase", "Relevancy"])
	.sort_values(by="Relevancy", ascending=False)
	.reset_index(drop=True)
	)

	df.index += 1

	# Add styling
	cmGreen = sns.light_palette("green", as_cmap=True)
	cmRed = sns.light_palette("red", as_cmap=True)
	df = df.style.background_gradient(
	cmap=cmGreen,
	subset=[
	"Relevancy",
	],
	)
	c1, c2, c3 = st.columns([1, 3, 1])

	format_dictionary = {
	"Relevancy": "{:.1%}",
	}

	df = df.format(format_dictionary)

	with c2:
	st.table(df)

	######## SDG!
	from transformers import pipeline

	finetuned_checkpoint = "jonas/sdg_classifier_osdg"
	classifier = pipeline("text-classification", model=finetuned_checkpoint)

	word_list = text_str.split()
	len_word_list = len(word_list)
	par_list = []
	par_len = 130
	for i in range(0,len_word_list // par_len):
	string_part = ' '.join(word_list[ipar_len:(i+1)par_len])
	par_list.append(string_part)

	labels = classifier(par_list)
	labels_= [(l['label'],l['score']) for l in labels]
	df = DataFrame(labels_, columns=["SDG", "Relevancy"])
	df['text'] = ['... '+par+' ...' for par in par_list]
	df = df.sort_values(by="Relevancy", ascending=False).reset_index(drop=True)
	df.index += 1
	df =df[df['Relevancy']>.9]
	x = df['SDG'].value_counts()

	plt.rcParams['font.size'] = 25
	colors = plt.get_cmap('Blues')(np.linspace(0.2, 0.7, len(x)))
	# plot
	fig, ax = plt.subplots()
	ax.pie(x, colors=colors, radius=2, center=(4, 4),
	wedgeprops={"linewidth": 1, "edgecolor": "white"}, frame=False,labels =list(x.index))

	st.markdown("## 🎈 Anything related to SDGs?")

	c4, c5, c6 = st.columns([5, 7, 1])

	# Add styling
	cmGreen = sns.light_palette("green", as_cmap=True)
	cmRed = sns.light_palette("red", as_cmap=True)
	df = df.style.background_gradient(
	cmap=cmGreen,
	subset=[
	"Relevancy",
	],
	)

	format_dictionary = {
	"Relevancy": "{:.1%}",
	}

	df = df.format(format_dictionary)

	with c4:
	st.pyplot(fig)
	with c5:
	st.table(df)