File size: 1,046 Bytes
268baab
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
FUNC_HEADER_TEMPL = "def run(file_name: str, file_content: str, chunk: str)"

FUNC_RETURN_TYPE_TEMPL = {
    "text": "str",
    "date": "str",
    "text[]": "list[str]",
}

PREAMBLE_TEMPL_STD="""\"\"\"Property extraction strategy for property {name}.\"\"\""""

COMMENT_TEMPL_STD = """\t\"\"\"
\tRuns the property extraction strategy on processed chunk.

\tArgs:
\t\tfile_name (str): Name of the file from which the chunk was collected.
\t\tfile_content (str): Entire text extracted from file.
\t\tchunk (str): Chunk collected from file.
    
\tReturns:
\t\tExtracted property.
\t\"\"\""""

BODY_TEMPL_STD = "\treturn chunk"

BODY_TEMPL = {
    'body':        "\treturn chunk",
    'source':      "\treturn file_name",
    'chunk_id':    "\timport hashlib\n\treturn hashlib.md5(chunk.strip().encode('utf-8')).hexdigest()",
    'document_id': "\timport hashlib\n\treturn hashlib.md5(file_content.strip().encode('utf-8')).hexdigest()",
    'date':        "\timport datetime\n\treturn datetime.datetime.now().replace(tzinfo=datetime.timezone.utc)"
}