Eleonora Bernasconi commited on
Commit
755a5e3
·
1 Parent(s): 38d50e4
__pycache__/scholarly.cpython-37.pyc CHANGED
Binary files a/__pycache__/scholarly.cpython-37.pyc and b/__pycache__/scholarly.cpython-37.pyc differ
 
app.py CHANGED
@@ -17,27 +17,42 @@ st.write(data)
17
  cit_array = []
18
  count = 0
19
  st.write(count)
20
- if 'doi' not in data.columns:
21
- st.write("The 'doi' column does not exist in the CSV.")
22
- else:
23
- # Loop over DOIs and retrieve citation counts
24
- for index, row in data.iterrows():
25
- doi = row['doi']
 
 
 
 
26
  if doi:
27
- citation_count = scholarly.get_citation_count(doi)
28
- if citation_count != None:
29
- cit_array.append(citation_count)
30
- st.text(f"DOI: {doi}, Citation Count: {citation_count}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
  count += 1
32
- else:
33
- # Handle cases where DOI is None (e.g., bytitle lookup)
34
- title = row['title']
35
- doi_bytitle = scholarly.get_doi_from_title(str(title))
36
- citation_count_title = scholarly.get_citation_count(doi_bytitle)
37
- if citation_count_title != None:
38
- count += 1
39
- cit_array.append(citation_count_title)
40
  st.text(f"DOI from Title: {title}, Citation Count: {citation_count_title}")
 
 
41
 
42
  # Add the citation count column to the DataFrame
43
  data['Citation Count'] = cit_array
 
17
  cit_array = []
18
  count = 0
19
  st.write(count)
20
+
21
+ # Iterate over rows and update 'doi' column if necessary
22
+ for index, row in data.iterrows():
23
+ doi = row['doi']
24
+ title = row['title']
25
+
26
+ # If 'doi' is None, attempt to get DOI from title
27
+ if pd.isnull(doi):
28
+ doi = scholarly.get_doi_from_title(title)
29
+ # Update the DataFrame with the retrieved DOI
30
  if doi:
31
+ data.at[index, 'doi'] = doi
32
+
33
+ # Display the updated data table
34
+ st.write("Data with DOI")
35
+ st.write(data)
36
+
37
+ # Loop over DOIs and retrieve citation counts
38
+ for index, row in data.iterrows():
39
+ doi = row['doi']
40
+ if doi:
41
+ citation_count = scholarly.get_citation_count(doi)
42
+ if citation_count != None:
43
+ cit_array.append(citation_count)
44
+ st.text(f"DOI: {doi}, Citation Count: {citation_count}")
45
+ count += 1
46
+ else:
47
+ # Handle cases where DOI is None (e.g., bytitle lookup)
48
+ title = row['title']
49
+ doi_bytitle = scholarly.get_doi_from_title(str(title))
50
+ citation_count_title = scholarly.get_citation_count(doi_bytitle)
51
+ if citation_count_title != None:
52
  count += 1
 
 
 
 
 
 
 
 
53
  st.text(f"DOI from Title: {title}, Citation Count: {citation_count_title}")
54
+ cit_array.append(citation_count_title)
55
+
56
 
57
  # Add the citation count column to the DataFrame
58
  data['Citation Count'] = cit_array
filtered_data (1).csv ADDED
The diff for this file is too large to render. See raw diff
 
output/googleScholarcsv.csv ADDED
The diff for this file is too large to render. See raw diff
 
output/metrics_googleScholarcsv.csv ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ Query,Source,Papers,Citations,Years,Cites_Year,Cites_Paper,Cites_Author,Papers_Author,Authors_Paper,h_index,g_index,hc_index,hI_index,hI_norm,AWCR,AW_index,AWCRpA,e_index,hm_index,QueryDate,Cites_Author_Year,hI_annual,h_coverage,g_coverage,star_count,year_first,year_last,ECC,acc1,acc2,acc5,acc20,hA
2
+ "IRCDL","Google Scholar",298,1282,18,71.22,4.30,489.08,129.46,3.08,16,24,12,5.22,9,222.13,14.90,86.80,14.97,9.80,"2023-09-15 16:29:59",27.17,0.50,37.4,45.8,1,2005,2023,1282,83,32,6,0,5
output/metrics_semanticscholarcsv.csv ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ Query,Source,Papers,Citations,Years,Cites_Year,Cites_Paper,Cites_Author,Papers_Author,Authors_Paper,h_index,g_index,hc_index,hI_index,hI_norm,AWCR,AW_index,AWCRpA,e_index,hm_index,QueryDate,Cites_Author_Year,hI_annual,h_coverage,g_coverage,star_count,year_first,year_last,ECC,acc1,acc2,acc5,acc20,hA
2
+ "IRCDL","Semantic Scholar",227,433,17,25.47,1.91,159.09,93.77,3.11,9,11,9,2.45,5,85.76,9.26,30.32,5.92,6.04,"2023-09-15 16:36:15",9.35,0.29,26.8,30.9,0,2006,2023,433,26,10,1,0,3
output/output_crawled_data.csv ADDED
The diff for this file is too large to render. See raw diff
 
output/semanticscholarcsv.csv ADDED
The diff for this file is too large to render. See raw diff
 
scholarly.py CHANGED
@@ -20,7 +20,7 @@ def get_doi_from_title(title):
20
  similarity_score = fuzz.ratio(title.lower(), retrieved_title) / 100 # Calcola il punteggio di similarità
21
  # soglia di similarità desiderata (75%)
22
  similarity_threshold = 0.75
23
- print(retrieved_title, similarity_score)
24
  if similarity_score >= similarity_threshold:
25
  # pdb.set_trace()
26
  return item.get('DOI', None)
 
20
  similarity_score = fuzz.ratio(title.lower(), retrieved_title) / 100 # Calcola il punteggio di similarità
21
  # soglia di similarità desiderata (75%)
22
  similarity_threshold = 0.75
23
+ # print(retrieved_title, similarity_score)
24
  if similarity_score >= similarity_threshold:
25
  # pdb.set_trace()
26
  return item.get('DOI', None)