Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -96,33 +96,37 @@ def parse_to_markdown(text):
|
|
| 96 |
import re
|
| 97 |
|
| 98 |
def extract_urls(text):
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 126 |
|
| 127 |
|
| 128 |
|
|
|
|
| 96 |
import re
|
| 97 |
|
| 98 |
def extract_urls(text):
|
| 99 |
+
try:
|
| 100 |
+
# Regular expression patterns to find the required fields
|
| 101 |
+
date_pattern = re.compile(r'### (\d{2} \w{3} \d{4})')
|
| 102 |
+
abs_link_pattern = re.compile(r'\[(.*?)\]\((https://arxiv\.org/abs/\d+\.\d+)\)')
|
| 103 |
+
pdf_link_pattern = re.compile(r'\[⬇️\]\((https://arxiv\.org/pdf/\d+\.\d+)\)')
|
| 104 |
+
title_pattern = re.compile(r'### \d{2} \w{3} \d{4} \| \[(.*?)\]')
|
| 105 |
+
|
| 106 |
+
# Find all occurrences of the required fields using the regular expression patterns
|
| 107 |
+
date_matches = date_pattern.findall(text)
|
| 108 |
+
abs_link_matches = abs_link_pattern.findall(text)
|
| 109 |
+
pdf_link_matches = pdf_link_pattern.findall(text)
|
| 110 |
+
title_matches = title_pattern.findall(text)
|
| 111 |
+
|
| 112 |
+
# Generate markdown string with the extracted fields
|
| 113 |
+
markdown_text = ""
|
| 114 |
+
for i in range(len(date_matches)):
|
| 115 |
+
date = date_matches[i]
|
| 116 |
+
title = title_matches[i]
|
| 117 |
+
abs_link = abs_link_matches[i][1]
|
| 118 |
+
pdf_link = pdf_link_matches[i]
|
| 119 |
+
|
| 120 |
+
markdown_text += f"**Date:** {date}\n\n"
|
| 121 |
+
markdown_text += f"**Title:** {title}\n\n"
|
| 122 |
+
markdown_text += f"**Abstract Link:** [{abs_link}]({abs_link})\n\n"
|
| 123 |
+
markdown_text += f"**PDF Link:** [{pdf_link}]({pdf_link})\n\n"
|
| 124 |
+
markdown_text += "---\n\n"
|
| 125 |
+
|
| 126 |
+
return markdown_text
|
| 127 |
+
except:
|
| 128 |
+
st.write('.')
|
| 129 |
+
return ''
|
| 130 |
|
| 131 |
|
| 132 |
|