raannakasturi commited on
Commit
41c53ca
·
verified ·
1 Parent(s): b0deb3d

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +81 -0
  2. requiremets.txt +3 -0
app.py ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import os
3
+ import pdf2doi
4
+ import gradio as gr
5
+ import requests
6
+ import html
7
+
8
+ def download_pdf(url):
9
+ file_path = f"{url.split('/')[-1]}.pdf"
10
+ response = requests.get(url)
11
+ with open(file_path, 'wb') as file:
12
+ file.write(response.content)
13
+ return file_path
14
+
15
+ def get_doi(pdf_path):
16
+ pdf2doi.config.set('verbose', False)
17
+ results = pdf2doi.pdf2doi(pdf_path)
18
+ validation_info = json.loads(results['validation_info'])
19
+ doi = validation_info.get('DOI', None)
20
+ title = validation_info.get('title', None)
21
+ url = validation_info.get('URL', None)
22
+ return doi, title, url
23
+
24
+ def get_paper_data(doi):
25
+ api_url = f"https://api.citeas.org/product/{doi}"
26
+ response = requests.get(api_url)
27
+ return response.json()
28
+
29
+ def main(pdf_url):
30
+ pdf_path = download_pdf(pdf_url)
31
+ doi, title, url = get_doi(pdf_path)
32
+ if doi is None:
33
+ return json.dumps({"error": "DOI not found"}, indent=4)
34
+ paper_data = get_paper_data(doi)
35
+ if not paper_data:
36
+ return json.dumps({"error": "Paper data not found"}, indent=4)
37
+ citation_text = None
38
+ for citation in paper_data.get('citations', []):
39
+ if citation.get('style_shortname') == 'apa':
40
+ citation_text = citation.get('citation')
41
+ break
42
+ title = title or paper_data.get('name')
43
+ url = url or f"https://doi.org/{doi}"
44
+ if citation_text:
45
+ citation_text = citation_text.encode('utf-8').decode('utf-8')
46
+ citation_text = html.unescape(
47
+ citation_text.replace("<i>", "").replace("</i>", "").replace("\u2026", "...").replace("\n", " ")
48
+ )
49
+ else:
50
+ citation_text = "Citation not found"
51
+ data = {
52
+ "doi": doi,
53
+ "title": title if title else "Title not found",
54
+ "citation_text": citation_text,
55
+ "url": url
56
+ }
57
+ os.remove(pdf_path)
58
+ return json.dumps(data, ensure_ascii=False, indent=4)
59
+
60
+
61
+ theme = gr.themes.Soft(
62
+ primary_hue="purple",
63
+ secondary_hue="cyan",
64
+ neutral_hue="slate",
65
+ font=[
66
+ gr.themes.GoogleFont("Syne"),
67
+ gr.themes.GoogleFont("Poppins"),
68
+ gr.themes.GoogleFont("Poppins"),
69
+ gr.themes.GoogleFont("Poppins")
70
+ ],
71
+ )
72
+
73
+ with gr.Blocks(theme=theme) as app:
74
+ with gr.Row():
75
+ pdf_path = gr.Textbox(lines=1, label="PDF URL", placeholder="Enter the URL of the PDF")
76
+ doi_data = gr.Textbox(lines=7, label="DOI Data", placeholder="DOI data will be displayed here", show_copy_button=True)
77
+ get_data = gr.Button(value="Get DOI Data", variant='primary')
78
+
79
+ get_data.click(main, inputs=[pdf_path], outputs=[doi_data], api_name="getDOIData")
80
+
81
+ app.queue(default_concurrency_limit=250).launch()
requiremets.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ pdf2doi
2
+ gradio
3
+ requests