Upload folder using huggingface_hub
Browse files- .idea/git_toolbox_prj.xml +15 -0
- .idea/workspace.xml +16 -6
- main.py +8 -2
.idea/git_toolbox_prj.xml
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<?xml version="1.0" encoding="UTF-8"?>
|
| 2 |
+
<project version="4">
|
| 3 |
+
<component name="GitToolBoxProjectSettings">
|
| 4 |
+
<option name="commitMessageIssueKeyValidationOverride">
|
| 5 |
+
<BoolValueOverride>
|
| 6 |
+
<option name="enabled" value="true" />
|
| 7 |
+
</BoolValueOverride>
|
| 8 |
+
</option>
|
| 9 |
+
<option name="commitMessageValidationEnabledOverride">
|
| 10 |
+
<BoolValueOverride>
|
| 11 |
+
<option name="enabled" value="true" />
|
| 12 |
+
</BoolValueOverride>
|
| 13 |
+
</option>
|
| 14 |
+
</component>
|
| 15 |
+
</project>
|
.idea/workspace.xml
CHANGED
|
@@ -4,11 +4,7 @@
|
|
| 4 |
<option name="autoReloadType" value="SELECTIVE" />
|
| 5 |
</component>
|
| 6 |
<component name="ChangeListManager">
|
| 7 |
-
<list default="true" id="02cef8cb-a1a8-48aa-8d50-1e20129f7d23" name="Changes" comment="">
|
| 8 |
-
<change afterPath="$PROJECT_DIR$/requirements.txt" afterDir="false" />
|
| 9 |
-
<change beforePath="$PROJECT_DIR$/.idea/unimoConvertPdfToExcel.iml" beforeDir="false" afterPath="$PROJECT_DIR$/.idea/unimoConvertPdfToExcel.iml" afterDir="false" />
|
| 10 |
-
<change beforePath="$PROJECT_DIR$/main.py" beforeDir="false" afterPath="$PROJECT_DIR$/main.py" afterDir="false" />
|
| 11 |
-
</list>
|
| 12 |
<option name="SHOW_DIALOG" value="false" />
|
| 13 |
<option name="HIGHLIGHT_CONFLICTS" value="true" />
|
| 14 |
<option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" />
|
|
@@ -53,13 +49,27 @@
|
|
| 53 |
<option name="number" value="Default" />
|
| 54 |
<option name="presentableId" value="Default" />
|
| 55 |
<updated>1741724175531</updated>
|
| 56 |
-
<workItem from="1741724176678" duration="
|
| 57 |
</task>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 58 |
<servers />
|
| 59 |
</component>
|
| 60 |
<component name="TypeScriptGeneratedFilesManager">
|
| 61 |
<option name="version" value="3" />
|
| 62 |
</component>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 63 |
<component name="com.intellij.coverage.CoverageDataManagerImpl">
|
| 64 |
<SUITE FILE_PATH="coverage/unimoConvertPdfToExcel$main.coverage" NAME="main Coverage Results" MODIFIED="1741724531337" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$" />
|
| 65 |
</component>
|
|
|
|
| 4 |
<option name="autoReloadType" value="SELECTIVE" />
|
| 5 |
</component>
|
| 6 |
<component name="ChangeListManager">
|
| 7 |
+
<list default="true" id="02cef8cb-a1a8-48aa-8d50-1e20129f7d23" name="Changes" comment="Update project configuration and dependencies Updated the Python SDK configuration in the `.iml` file to use Python 3.12. Enabled app sharing in `main.py` by setting `share=True` in the Gradio app launch. Added a `requirements.txt` file to document and manage project dependencies." />
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
<option name="SHOW_DIALOG" value="false" />
|
| 9 |
<option name="HIGHLIGHT_CONFLICTS" value="true" />
|
| 10 |
<option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" />
|
|
|
|
| 49 |
<option name="number" value="Default" />
|
| 50 |
<option name="presentableId" value="Default" />
|
| 51 |
<updated>1741724175531</updated>
|
| 52 |
+
<workItem from="1741724176678" duration="1970000" />
|
| 53 |
</task>
|
| 54 |
+
<task id="LOCAL-00001" summary="Update project configuration and dependencies Updated the Python SDK configuration in the `.iml` file to use Python 3.12. Enabled app sharing in `main.py` by setting `share=True` in the Gradio app launch. Added a `requirements.txt` file to document and manage project dependencies.">
|
| 55 |
+
<option name="closed" value="true" />
|
| 56 |
+
<created>1741726163321</created>
|
| 57 |
+
<option name="number" value="00001" />
|
| 58 |
+
<option name="presentableId" value="LOCAL-00001" />
|
| 59 |
+
<option name="project" value="LOCAL" />
|
| 60 |
+
<updated>1741726163321</updated>
|
| 61 |
+
</task>
|
| 62 |
+
<option name="localTasksCounter" value="2" />
|
| 63 |
<servers />
|
| 64 |
</component>
|
| 65 |
<component name="TypeScriptGeneratedFilesManager">
|
| 66 |
<option name="version" value="3" />
|
| 67 |
</component>
|
| 68 |
+
<component name="VcsManagerConfiguration">
|
| 69 |
+
<MESSAGE value="Update project settings, enable app sharing, and add dependencies Updated the Python SDK configuration in the project settings and enabled sharing in the application launch. Added a `requirements.txt` file with all necessary dependencies for the project. These changes improve environment consistency and facilitation of collaboration." />
|
| 70 |
+
<MESSAGE value="Update project configuration and dependencies Updated the Python SDK configuration in the `.iml` file to use Python 3.12. Enabled app sharing in `main.py` by setting `share=True` in the Gradio app launch. Added a `requirements.txt` file to document and manage project dependencies." />
|
| 71 |
+
<option name="LAST_COMMIT_MESSAGE" value="Update project configuration and dependencies Updated the Python SDK configuration in the `.iml` file to use Python 3.12. Enabled app sharing in `main.py` by setting `share=True` in the Gradio app launch. Added a `requirements.txt` file to document and manage project dependencies." />
|
| 72 |
+
</component>
|
| 73 |
<component name="com.intellij.coverage.CoverageDataManagerImpl">
|
| 74 |
<SUITE FILE_PATH="coverage/unimoConvertPdfToExcel$main.coverage" NAME="main Coverage Results" MODIFIED="1741724531337" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$" />
|
| 75 |
</component>
|
main.py
CHANGED
|
@@ -67,11 +67,16 @@ def process_pdf(pdf_file):
|
|
| 67 |
# Ensure correct Brazilian formatting
|
| 68 |
df_final["VLR.COMISSAO"] = df_final["VLR.COMISSAO"].str.replace(",", "X", regex=False).str.replace(".", ",", regex=False).str.replace("X", ".", regex=False)
|
| 69 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 70 |
# Save the correctly formatted file
|
| 71 |
output_excel_path = "final_filtered_output_brazilian.xlsx"
|
| 72 |
df_final.to_excel(output_excel_path, index=False)
|
| 73 |
|
| 74 |
-
return output_excel_path
|
| 75 |
|
| 76 |
# Gradio UI
|
| 77 |
with gr.Blocks() as app:
|
|
@@ -82,9 +87,10 @@ with gr.Blocks() as app:
|
|
| 82 |
pdf_input = gr.File(label="Upload your PDF file")
|
| 83 |
process_button = gr.Button("Process PDF")
|
| 84 |
|
|
|
|
| 85 |
output_file = gr.File(label="Download Excel File")
|
| 86 |
|
| 87 |
-
process_button.click(fn=process_pdf, inputs=[pdf_input], outputs=[output_file])
|
| 88 |
|
| 89 |
# Launch the Gradio app
|
| 90 |
app.launch(share=True)
|
|
|
|
| 67 |
# Ensure correct Brazilian formatting
|
| 68 |
df_final["VLR.COMISSAO"] = df_final["VLR.COMISSAO"].str.replace(",", "X", regex=False).str.replace(".", ",", regex=False).str.replace("X", ".", regex=False)
|
| 69 |
|
| 70 |
+
# Update the NOME DO CLIENTE column with the required pattern
|
| 71 |
+
df_final["NOME DO CLIENTE"] = df_final.apply(
|
| 72 |
+
lambda row: f'CLIENTE: {row["NOME DO CLIENTE"]} COTA: {row["COTA"][:4]} GRUPO: {row["COTA"][5:]}', axis=1
|
| 73 |
+
)
|
| 74 |
+
|
| 75 |
# Save the correctly formatted file
|
| 76 |
output_excel_path = "final_filtered_output_brazilian.xlsx"
|
| 77 |
df_final.to_excel(output_excel_path, index=False)
|
| 78 |
|
| 79 |
+
return df_final, output_excel_path
|
| 80 |
|
| 81 |
# Gradio UI
|
| 82 |
with gr.Blocks() as app:
|
|
|
|
| 87 |
pdf_input = gr.File(label="Upload your PDF file")
|
| 88 |
process_button = gr.Button("Process PDF")
|
| 89 |
|
| 90 |
+
output_df = gr.Dataframe(label="Extracted Data Preview")
|
| 91 |
output_file = gr.File(label="Download Excel File")
|
| 92 |
|
| 93 |
+
process_button.click(fn=process_pdf, inputs=[pdf_input], outputs=[output_df, output_file])
|
| 94 |
|
| 95 |
# Launch the Gradio app
|
| 96 |
app.launch(share=True)
|