Spaces:

giswqs
/

scholar

Running

scholar / apps /h_index.py

Update st.cache

b1fc4cc almost 2 years ago

1.69 kB

	import dimcli
	import pandas as pd
	import sys
	import os
	import streamlit as st
	import scholarpy

	if "dsl" not in st.session_state:
	st.session_state["dsl"] = scholarpy.Dsl()


	@st.cache_data
	def the_H_function(sorted_citations_list, n=1):
	"""from a list of integers [n1, n2 ..] representing publications citations,
	return the max list-position which is >= integer

	eg
	>>> the_H_function([10, 8, 5, 4, 3]) => 4
	>>> the_H_function([25, 8, 5, 3, 3]) => 3
	>>> the_H_function([1000, 20]) => 2
	"""
	if sorted_citations_list and sorted_citations_list[0] >= n:
	return the_H_function(sorted_citations_list[1:], n + 1)
	else:
	return n - 1


	def dim_login(key=None, endpoint=None):

	if key is None:
	KEY = os.environ.get("DIM_TOKEN")

	if endpoint is None:
	ENDPOINT = "https://app.dimensions.ai"

	try:
	dimcli.login(key=KEY, endpoint=ENDPOINT)
	dsl = dimcli.Dsl()
	return dsl
	except:
	raise Exception("Failed to login to Dimensions")


	@st.cache_data
	def get_pubs_df(dsl, researcher_id):

	q = """search publications where researchers.id = "{}" return publications[id+title+doi+times_cited] sort by times_cited limit 1000"""

	pubs = dsl.query(q.format(researcher_id))
	return pubs.as_dataframe()


	@st.cache_data
	def get_citations(df):
	return list(df.fillna(0)["times_cited"])


	def app():

	dsl = st.session_state["dsl"]

	researchER_id = st.text_input("Enter researcher ID:", "ur.013632443777.66")
	df = get_pubs_df(dsl, researchER_id)
	st.dataframe(df)
	citations = get_citations(df)
	h_index = the_H_function(citations)
	st.write(f"H-index: {h_index}")