Spaces:

findConsole
/

PromptTesting

Sleeping

App Files Files Community

Marthee commited on 9 days ago

Commit

adf7f0e

verified ·

1 Parent(s): b53a59b

Update app.py

Browse files

Files changed (1) hide show

app.py +16 -2

app.py CHANGED Viewed

@@ -1780,7 +1780,7 @@ def testFunction(pdf_path, model,LLM_prompt):
     highlighted=[]
     processed_subjects = set()  # Initialize at the top of testFunction
     toc_pages = get_toc_page_numbers(doc)
-    identified_headers=process_document_in_chunks(len(doc), pdf_path,LLM_prompt, model)
     # identified_headers = identify_headers_with_openrouterNEWW(doc, api_key='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')# ['text', fontsize, page number,y]
     # with open("identified_headers.txt", "w", encoding="utf-8") as f:
@@ -2232,6 +2232,19 @@ def testFunction(pdf_path, model,LLM_prompt):
             highlight_boxes(docHighlights, page_highlights,stringtowrite)
     print("Current working directory:", os.getcwd())
     if data_list_JSON and not data_list_JSON[-1]["BodyText"] and collected_lines:
         data_list_JSON[-1]["BodyText"] = collected_lines[1:] if len(collected_lines) > 0 else []
 # Final cleanup of the JSON data before returning
@@ -2244,7 +2257,8 @@ def testFunction(pdf_path, model,LLM_prompt):
             # If they match or the subject is inside the first line, remove it
             if subject in first_line or first_line in subject:
-                entry["BodyText"] = entry["BodyText"][1:]
     jsons.append(data_list_JSON)
     logger.info(f"Markups done! Uploading to dropbox")
     logger.info(f"Uploaded and Readyy!")

     highlighted=[]
     processed_subjects = set()  # Initialize at the top of testFunction
     toc_pages = get_toc_page_numbers(doc)
+    identified_headers=process_document_in_chunks(len(doc), pdf_path, LLM_prompt, model)
     # identified_headers = identify_headers_with_openrouterNEWW(doc, api_key='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')# ['text', fontsize, page number,y]
     # with open("identified_headers.txt", "w", encoding="utf-8") as f:
             highlight_boxes(docHighlights, page_highlights,stringtowrite)
     print("Current working directory:", os.getcwd())
+    docHighlights.save("highlighted_output.pdf")
+    # dbxTeam = tsadropboxretrieval.ADR_Access_DropboxTeam('user')
+    # metadata = dbxTeam.sharing_get_shared_link_metadata(pdf_path)
+    # dbPath = '/TSA JOBS/ADR Test/FIND/'
+    # pdf_bytes = BytesIO()
+    # docHighlights.save(pdf_bytes)
+    # pdflink = tsadropboxretrieval.uploadanyFile(doc=docHighlights, path=dbPath, pdfname=filename)
+    # json_output=changepdflinks(json_output,pdflink)
+    # return pdf_bytes.getvalue(), docHighlights , json_output , Alltexttobebilled , alltextWithoutNotbilled , filename
+    # Final safety check: if the very last entry in our list has an empty BodyText,
+    # but we have collected_lines, sync them.
     if data_list_JSON and not data_list_JSON[-1]["BodyText"] and collected_lines:
         data_list_JSON[-1]["BodyText"] = collected_lines[1:] if len(collected_lines) > 0 else []
 # Final cleanup of the JSON data before returning
             # If they match or the subject is inside the first line, remove it
             if subject in first_line or first_line in subject:
+                entry["BodyText"] = entry["BodyText"][1:]
     jsons.append(data_list_JSON)
     logger.info(f"Markups done! Uploading to dropbox")
     logger.info(f"Uploaded and Readyy!")