Spaces:

WhaleCancer
/

MechanicalClerk

Build error

WhaleCancer commited on Oct 7, 2023

Commit

99f1718

1 Parent(s): 0d5b03d

zz

Files changed (3) hide show

app.py CHANGED Viewed

@@ -107,10 +107,17 @@ with gr.Blocks() as demo:
         with gr.Row():
             with gr.Column():
-                btn_AttachArticles = gr.Button("🐢 Include Articles to [Cleaned Search Matrix Results]")
             with gr.Column():
                 file_AttachedSearchResults = gr.File(label="🗃️ [Cleaned Search Matrix Results Including Articles]", file_types=['.tsv'], height=85)
         with gr.Row():
             btn_sendToAIInterrogationTab = gr.Button('Send to AI Interrogation Tab', variant='primary')

         with gr.Row():
             with gr.Column():
+                btn_AttachArticles = gr.Button("🐌 Include [Processed Search Matrix Results]")
             with gr.Column():
                 file_AttachedSearchResults = gr.File(label="🗃️ [Cleaned Search Matrix Results Including Articles]", file_types=['.tsv'], height=85)
+        # tokenizer
+        with gr.Row():
+            with gr.Column():
+                gr.Button('')
+            with gr.Column():
+                gr.File('')
         with gr.Row():
             btn_sendToAIInterrogationTab = gr.Button('Send to AI Interrogation Tab', variant='primary')

askURL.py CHANGED Viewed

@@ -25,9 +25,8 @@ def get_token_length(text):
     return token_length
 def get_main_text_from_url(url):
-    print('get_main_text_from_url...')
     # Define the fake header
     headers = {
         'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
@@ -48,10 +47,12 @@ def get_main_text_from_url(url):
         if main_text_element is None:
             main_text_element = soup.find('div', {'class': 'content-area'})  # tri-cities dispatch
         if main_text_element is None:
-            main_text_element = soup.find('article')
         if main_text_element is None:
             main_text_element = soup.find('body')
         if main_text_element:
             main_text = main_text_element.text
             # print(main_text)

     return token_length
 def get_main_text_from_url(url):
+    # print('get_main_text_from_url...')
     # Define the fake header
     headers = {
         'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
         if main_text_element is None:
             main_text_element = soup.find('div', {'class': 'content-area'})  # tri-cities dispatch
         if main_text_element is None:
+            main_text_element = soup.find('article', {'class': 'article-content-story article-content-story--story'})
         if main_text_element is None:
             main_text_element = soup.find('body')
         if main_text_element:
             main_text = main_text_element.text
             # print(main_text)

processAgain.py CHANGED Viewed

@@ -70,7 +70,7 @@ def attach_articles(file):
     output_fname = 'output_2zzz.tsv'
-        # Write the processed data to a new file
     with open(output_fname, 'w') as output_file:
         # Define the fieldnames for the output file
         fieldnames = list(reader.fieldnames) + ["Content"]

     output_fname = 'output_2zzz.tsv'
+    # Write the processed data to a new file
     with open(output_fname, 'w') as output_file:
         # Define the fieldnames for the output file
         fieldnames = list(reader.fieldnames) + ["Content"]