Spaces:

amra-ai
/

summary

Sleeping

Roland Ding commited on Sep 12, 2023

Commit

362c1df

1 Parent(s): a5f3cfb

8.8.21.57 updated cloud_textract.py to replace default_s3_bucket instead of the manual string. Also added default_region as well as new app_data structure.

Files changed (2) hide show

application.py CHANGED Viewed

@@ -1,14 +1,17 @@
 import os
 '''
 shared environment variables
 '''
-default_device = "cervical-cage"
 aws_access_key_id = os.environ.get('AMRA_AWS_ACCESS_KEY_ID')
 aws_secret_access_key = os.environ.get('AMRA_AWS_SECRET_ACCESS_KEY')
 openai_api_key = os.environ.get('AMRA_OPENAI_API_KEY')
 device_options={
     "secondary extraction":False,
     "secondary extraction count":0
@@ -106,18 +109,27 @@ authors_inst=[
     f"return the results on the same line separated by commas.",
 ]
-accepted_date_inst=[
-    f"extract the acceptance date of the article from the system text.",
-    f"return the results on a single line as 'Accepted Date: <month>, <year>.",
 ]
 '''
 application default data
 '''
 app_data = {
-    "current_article":{},
     "articles":[],
     "terms":[],
-    "prompts":[],
-    # "outputs":[]
 }

 import os
+from collections import defaultdict
 '''
 shared environment variables
 '''
 aws_access_key_id = os.environ.get('AMRA_AWS_ACCESS_KEY_ID')
 aws_secret_access_key = os.environ.get('AMRA_AWS_SECRET_ACCESS_KEY')
 openai_api_key = os.environ.get('AMRA_OPENAI_API_KEY')
+default_region = "Spine"
 device_options={
     "secondary extraction":False,
     "secondary extraction count":0
     f"return the results on the same line separated by commas.",
 ]
+accepted_year_inst=[
+    f"extract the acceptance year of the article from the system text.",
+    f"return the results on a single line as 'Accepted Year: <year>.",
 ]
+accepted_month_inst=[
+    f"extract the acceptance month of the article from the system text.",
+    f"return the results on a single line as 'Accepted Month: <month>.",
+]
+abstract_inst=[
+    f"Extract the abstract of the article from the system text, and return its original text. Normally, the abstract is before the introduction and might a paragraph, or in sections of study design, objective, summary of background, methods, results, and conclusion with keywords section in the end.",
+    ]
 '''
 application default data
 '''
 app_data = {
+    "current article":{},
     "articles":[],
+    "prompts":{},
     "terms":[],
+    "paths":{}
 }

cloud_textract.py CHANGED Viewed

@@ -1,7 +1,7 @@
 import boto3
 from utility import terminal_print, create_md_table
-from application import aws_access_key_id, aws_secret_access_key
 textract = boto3.client(
     'textract',
@@ -190,7 +190,7 @@ def textract_output_to_table(table,blocks_dict):
             array.append([])
             cur_row = r_id
         if "Relationships" in c:
-            words = [blocks_dict[i]["Text"] for i in  c["Relationships"][0]["Ids"]]
         else:
             words =[""]
         # print(c["RowIndex"],c["ColumnIndex"]," ".join(words))
@@ -199,7 +199,7 @@ def textract_output_to_table(table,blocks_dict):
     return array
 @terminal_print
-def get_tables(filename:str,bucket:str="amra-studies"):
     '''
     This function is used to get the tables from the textract output

 import boto3
 from utility import terminal_print, create_md_table
+from application import aws_access_key_id, aws_secret_access_key, default_s3_bucket
 textract = boto3.client(
     'textract',
             array.append([])
             cur_row = r_id
         if "Relationships" in c:
+            words = [blocks_dict[i]["Text"] for i in  c["Relationships"][0]["Ids"] if blocks_dict[i]["BlockType"] == "WORD"]
         else:
             words =[""]
         # print(c["RowIndex"],c["ColumnIndex"]," ".join(words))
     return array
 @terminal_print
+def get_tables(filename:str,bucket:str=default_s3_bucket):
     '''
     This function is used to get the tables from the textract output