Spaces:
Runtime error
Runtime error
Eleonora Bernasconi commited on
Commit ·
755a5e3
1
Parent(s): 38d50e4
update
Browse files- __pycache__/scholarly.cpython-37.pyc +0 -0
- app.py +33 -18
- filtered_data (1).csv +0 -0
- output/googleScholarcsv.csv +0 -0
- output/metrics_googleScholarcsv.csv +2 -0
- output/metrics_semanticscholarcsv.csv +2 -0
- output/output_crawled_data.csv +0 -0
- output/semanticscholarcsv.csv +0 -0
- scholarly.py +1 -1
__pycache__/scholarly.cpython-37.pyc
CHANGED
|
Binary files a/__pycache__/scholarly.cpython-37.pyc and b/__pycache__/scholarly.cpython-37.pyc differ
|
|
|
app.py
CHANGED
|
@@ -17,27 +17,42 @@ st.write(data)
|
|
| 17 |
cit_array = []
|
| 18 |
count = 0
|
| 19 |
st.write(count)
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
if doi:
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
count += 1
|
| 32 |
-
else:
|
| 33 |
-
# Handle cases where DOI is None (e.g., bytitle lookup)
|
| 34 |
-
title = row['title']
|
| 35 |
-
doi_bytitle = scholarly.get_doi_from_title(str(title))
|
| 36 |
-
citation_count_title = scholarly.get_citation_count(doi_bytitle)
|
| 37 |
-
if citation_count_title != None:
|
| 38 |
-
count += 1
|
| 39 |
-
cit_array.append(citation_count_title)
|
| 40 |
st.text(f"DOI from Title: {title}, Citation Count: {citation_count_title}")
|
|
|
|
|
|
|
| 41 |
|
| 42 |
# Add the citation count column to the DataFrame
|
| 43 |
data['Citation Count'] = cit_array
|
|
|
|
| 17 |
cit_array = []
|
| 18 |
count = 0
|
| 19 |
st.write(count)
|
| 20 |
+
|
| 21 |
+
# Iterate over rows and update 'doi' column if necessary
|
| 22 |
+
for index, row in data.iterrows():
|
| 23 |
+
doi = row['doi']
|
| 24 |
+
title = row['title']
|
| 25 |
+
|
| 26 |
+
# If 'doi' is None, attempt to get DOI from title
|
| 27 |
+
if pd.isnull(doi):
|
| 28 |
+
doi = scholarly.get_doi_from_title(title)
|
| 29 |
+
# Update the DataFrame with the retrieved DOI
|
| 30 |
if doi:
|
| 31 |
+
data.at[index, 'doi'] = doi
|
| 32 |
+
|
| 33 |
+
# Display the updated data table
|
| 34 |
+
st.write("Data with DOI")
|
| 35 |
+
st.write(data)
|
| 36 |
+
|
| 37 |
+
# Loop over DOIs and retrieve citation counts
|
| 38 |
+
for index, row in data.iterrows():
|
| 39 |
+
doi = row['doi']
|
| 40 |
+
if doi:
|
| 41 |
+
citation_count = scholarly.get_citation_count(doi)
|
| 42 |
+
if citation_count != None:
|
| 43 |
+
cit_array.append(citation_count)
|
| 44 |
+
st.text(f"DOI: {doi}, Citation Count: {citation_count}")
|
| 45 |
+
count += 1
|
| 46 |
+
else:
|
| 47 |
+
# Handle cases where DOI is None (e.g., bytitle lookup)
|
| 48 |
+
title = row['title']
|
| 49 |
+
doi_bytitle = scholarly.get_doi_from_title(str(title))
|
| 50 |
+
citation_count_title = scholarly.get_citation_count(doi_bytitle)
|
| 51 |
+
if citation_count_title != None:
|
| 52 |
count += 1
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 53 |
st.text(f"DOI from Title: {title}, Citation Count: {citation_count_title}")
|
| 54 |
+
cit_array.append(citation_count_title)
|
| 55 |
+
|
| 56 |
|
| 57 |
# Add the citation count column to the DataFrame
|
| 58 |
data['Citation Count'] = cit_array
|
filtered_data (1).csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
output/googleScholarcsv.csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
output/metrics_googleScholarcsv.csv
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Query,Source,Papers,Citations,Years,Cites_Year,Cites_Paper,Cites_Author,Papers_Author,Authors_Paper,h_index,g_index,hc_index,hI_index,hI_norm,AWCR,AW_index,AWCRpA,e_index,hm_index,QueryDate,Cites_Author_Year,hI_annual,h_coverage,g_coverage,star_count,year_first,year_last,ECC,acc1,acc2,acc5,acc20,hA
|
| 2 |
+
"IRCDL","Google Scholar",298,1282,18,71.22,4.30,489.08,129.46,3.08,16,24,12,5.22,9,222.13,14.90,86.80,14.97,9.80,"2023-09-15 16:29:59",27.17,0.50,37.4,45.8,1,2005,2023,1282,83,32,6,0,5
|
output/metrics_semanticscholarcsv.csv
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Query,Source,Papers,Citations,Years,Cites_Year,Cites_Paper,Cites_Author,Papers_Author,Authors_Paper,h_index,g_index,hc_index,hI_index,hI_norm,AWCR,AW_index,AWCRpA,e_index,hm_index,QueryDate,Cites_Author_Year,hI_annual,h_coverage,g_coverage,star_count,year_first,year_last,ECC,acc1,acc2,acc5,acc20,hA
|
| 2 |
+
"IRCDL","Semantic Scholar",227,433,17,25.47,1.91,159.09,93.77,3.11,9,11,9,2.45,5,85.76,9.26,30.32,5.92,6.04,"2023-09-15 16:36:15",9.35,0.29,26.8,30.9,0,2006,2023,433,26,10,1,0,3
|
output/output_crawled_data.csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
output/semanticscholarcsv.csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
scholarly.py
CHANGED
|
@@ -20,7 +20,7 @@ def get_doi_from_title(title):
|
|
| 20 |
similarity_score = fuzz.ratio(title.lower(), retrieved_title) / 100 # Calcola il punteggio di similarità
|
| 21 |
# soglia di similarità desiderata (75%)
|
| 22 |
similarity_threshold = 0.75
|
| 23 |
-
print(retrieved_title, similarity_score)
|
| 24 |
if similarity_score >= similarity_threshold:
|
| 25 |
# pdb.set_trace()
|
| 26 |
return item.get('DOI', None)
|
|
|
|
| 20 |
similarity_score = fuzz.ratio(title.lower(), retrieved_title) / 100 # Calcola il punteggio di similarità
|
| 21 |
# soglia di similarità desiderata (75%)
|
| 22 |
similarity_threshold = 0.75
|
| 23 |
+
# print(retrieved_title, similarity_score)
|
| 24 |
if similarity_score >= similarity_threshold:
|
| 25 |
# pdb.set_trace()
|
| 26 |
return item.get('DOI', None)
|