Pygmales
updated project state
268baab
raw
history blame
1.05 kB
FUNC_HEADER_TEMPL = "def run(file_name: str, file_content: str, chunk: str)"
FUNC_RETURN_TYPE_TEMPL = {
"text": "str",
"date": "str",
"text[]": "list[str]",
}
PREAMBLE_TEMPL_STD="""\"\"\"Property extraction strategy for property {name}.\"\"\""""
COMMENT_TEMPL_STD = """\t\"\"\"
\tRuns the property extraction strategy on processed chunk.
\tArgs:
\t\tfile_name (str): Name of the file from which the chunk was collected.
\t\tfile_content (str): Entire text extracted from file.
\t\tchunk (str): Chunk collected from file.
\tReturns:
\t\tExtracted property.
\t\"\"\""""
BODY_TEMPL_STD = "\treturn chunk"
BODY_TEMPL = {
'body': "\treturn chunk",
'source': "\treturn file_name",
'chunk_id': "\timport hashlib\n\treturn hashlib.md5(chunk.strip().encode('utf-8')).hexdigest()",
'document_id': "\timport hashlib\n\treturn hashlib.md5(file_content.strip().encode('utf-8')).hexdigest()",
'date': "\timport datetime\n\treturn datetime.datetime.now().replace(tzinfo=datetime.timezone.utc)"
}