Spaces:
Build error
Build error
Commit ·
99f1718
1
Parent(s): 0d5b03d
zz
Browse files- app.py +8 -1
- askURL.py +4 -3
- processAgain.py +1 -1
app.py
CHANGED
|
@@ -107,10 +107,17 @@ with gr.Blocks() as demo:
|
|
| 107 |
|
| 108 |
with gr.Row():
|
| 109 |
with gr.Column():
|
| 110 |
-
btn_AttachArticles = gr.Button("
|
| 111 |
with gr.Column():
|
| 112 |
file_AttachedSearchResults = gr.File(label="🗃️ [Cleaned Search Matrix Results Including Articles]", file_types=['.tsv'], height=85)
|
| 113 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 114 |
with gr.Row():
|
| 115 |
btn_sendToAIInterrogationTab = gr.Button('Send to AI Interrogation Tab', variant='primary')
|
| 116 |
|
|
|
|
| 107 |
|
| 108 |
with gr.Row():
|
| 109 |
with gr.Column():
|
| 110 |
+
btn_AttachArticles = gr.Button("🐌 Include [Processed Search Matrix Results]")
|
| 111 |
with gr.Column():
|
| 112 |
file_AttachedSearchResults = gr.File(label="🗃️ [Cleaned Search Matrix Results Including Articles]", file_types=['.tsv'], height=85)
|
| 113 |
|
| 114 |
+
# tokenizer
|
| 115 |
+
with gr.Row():
|
| 116 |
+
with gr.Column():
|
| 117 |
+
gr.Button('')
|
| 118 |
+
with gr.Column():
|
| 119 |
+
gr.File('')
|
| 120 |
+
|
| 121 |
with gr.Row():
|
| 122 |
btn_sendToAIInterrogationTab = gr.Button('Send to AI Interrogation Tab', variant='primary')
|
| 123 |
|
askURL.py
CHANGED
|
@@ -25,9 +25,8 @@ def get_token_length(text):
|
|
| 25 |
|
| 26 |
return token_length
|
| 27 |
|
| 28 |
-
|
| 29 |
def get_main_text_from_url(url):
|
| 30 |
-
print('get_main_text_from_url...')
|
| 31 |
# Define the fake header
|
| 32 |
headers = {
|
| 33 |
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
|
|
@@ -48,10 +47,12 @@ def get_main_text_from_url(url):
|
|
| 48 |
if main_text_element is None:
|
| 49 |
main_text_element = soup.find('div', {'class': 'content-area'}) # tri-cities dispatch
|
| 50 |
if main_text_element is None:
|
| 51 |
-
main_text_element = soup.find('article')
|
| 52 |
if main_text_element is None:
|
| 53 |
main_text_element = soup.find('body')
|
| 54 |
|
|
|
|
|
|
|
| 55 |
if main_text_element:
|
| 56 |
main_text = main_text_element.text
|
| 57 |
# print(main_text)
|
|
|
|
| 25 |
|
| 26 |
return token_length
|
| 27 |
|
|
|
|
| 28 |
def get_main_text_from_url(url):
|
| 29 |
+
# print('get_main_text_from_url...')
|
| 30 |
# Define the fake header
|
| 31 |
headers = {
|
| 32 |
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
|
|
|
|
| 47 |
if main_text_element is None:
|
| 48 |
main_text_element = soup.find('div', {'class': 'content-area'}) # tri-cities dispatch
|
| 49 |
if main_text_element is None:
|
| 50 |
+
main_text_element = soup.find('article', {'class': 'article-content-story article-content-story--story'})
|
| 51 |
if main_text_element is None:
|
| 52 |
main_text_element = soup.find('body')
|
| 53 |
|
| 54 |
+
|
| 55 |
+
|
| 56 |
if main_text_element:
|
| 57 |
main_text = main_text_element.text
|
| 58 |
# print(main_text)
|
processAgain.py
CHANGED
|
@@ -70,7 +70,7 @@ def attach_articles(file):
|
|
| 70 |
|
| 71 |
output_fname = 'output_2zzz.tsv'
|
| 72 |
|
| 73 |
-
|
| 74 |
with open(output_fname, 'w') as output_file:
|
| 75 |
# Define the fieldnames for the output file
|
| 76 |
fieldnames = list(reader.fieldnames) + ["Content"]
|
|
|
|
| 70 |
|
| 71 |
output_fname = 'output_2zzz.tsv'
|
| 72 |
|
| 73 |
+
# Write the processed data to a new file
|
| 74 |
with open(output_fname, 'w') as output_file:
|
| 75 |
# Define the fieldnames for the output file
|
| 76 |
fieldnames = list(reader.fieldnames) + ["Content"]
|