Spaces:
Build error
Build error
Quentin Gallouédec
commited on
Commit
·
ce89e6e
1
Parent(s):
e6fad20
private
Browse files
app.py
CHANGED
|
@@ -68,12 +68,13 @@ def clean(text):
|
|
| 68 |
return text
|
| 69 |
|
| 70 |
|
| 71 |
-
def pdf2dataset(pathes, user_id, dataset_id, token, progress=gr.Progress()):
|
| 72 |
if any([user_id, dataset_id, token]) and not all([user_id, dataset_id, token]):
|
| 73 |
raise gr.Error("Please provide all three: User ID, Dataset ID, and API token.")
|
| 74 |
|
| 75 |
if user_id == "":
|
| 76 |
user_id = "pdf2dataset"
|
|
|
|
| 77 |
if dataset_id == "":
|
| 78 |
dataset_id = f"{random.getrandbits(128):x}"
|
| 79 |
if token == "":
|
|
@@ -104,7 +105,7 @@ def pdf2dataset(pathes, user_id, dataset_id, token, progress=gr.Progress()):
|
|
| 104 |
# Upload the dataset to Hugging Face
|
| 105 |
progress(0, desc="Uploading to Hugging Face...")
|
| 106 |
dataset = Dataset.from_dict({"text": page_texts, "source": page_filenames})
|
| 107 |
-
dataset.push_to_hub(f"{user_id}/{dataset_id}", token=token)
|
| 108 |
progress(1, desc="Done!")
|
| 109 |
|
| 110 |
instructions = instructions_template.substitute(user_id=user_id, dataset_id=dataset_id)
|
|
@@ -164,7 +165,7 @@ with gr.Blocks() as demo:
|
|
| 164 |
gr.Markdown("## 1️⃣ Upload PDFs")
|
| 165 |
file = gr.File(file_types=["pdf"], file_count="multiple")
|
| 166 |
gr.Markdown(caution_text)
|
| 167 |
-
with gr.Accordion("🔒 Pushing to my personal Hugging Face
|
| 168 |
gr.Markdown(
|
| 169 |
"""Recommended for API token
|
| 170 |
- Go to https://huggingface.co/settings/tokens?new_token=true
|
|
@@ -176,6 +177,7 @@ with gr.Blocks() as demo:
|
|
| 176 |
user_id = gr.Textbox(label="User ID", placeholder="Enter your Hugging Face user ID")
|
| 177 |
dataset_id = gr.Textbox(label="Dataset ID", placeholder="Enter the desired dataset ID")
|
| 178 |
token = gr.Textbox(label="API token", placeholder="Enter a Hugging Face API token")
|
|
|
|
| 179 |
|
| 180 |
gr.Markdown("## 2️⃣ Convert the PDFs and upload")
|
| 181 |
convert_button = gr.Button("🔄 Convert and upload")
|
|
@@ -189,7 +191,9 @@ with gr.Blocks() as demo:
|
|
| 189 |
delete_button = gr.Button("🗑️ Delete dataset")
|
| 190 |
|
| 191 |
# Define the actions
|
| 192 |
-
convert_button.click(
|
|
|
|
|
|
|
| 193 |
delete_button.click(delete_dataset, inputs=[dataset_id_to_delete], outputs=[delete_button])
|
| 194 |
dataset_id_to_delete.input(lambda: "🗑️ Delete dataset", outputs=[delete_button])
|
| 195 |
|
|
|
|
| 68 |
return text
|
| 69 |
|
| 70 |
|
| 71 |
+
def pdf2dataset(pathes, user_id, dataset_id, token, private, progress=gr.Progress()):
|
| 72 |
if any([user_id, dataset_id, token]) and not all([user_id, dataset_id, token]):
|
| 73 |
raise gr.Error("Please provide all three: User ID, Dataset ID, and API token.")
|
| 74 |
|
| 75 |
if user_id == "":
|
| 76 |
user_id = "pdf2dataset"
|
| 77 |
+
private = False
|
| 78 |
if dataset_id == "":
|
| 79 |
dataset_id = f"{random.getrandbits(128):x}"
|
| 80 |
if token == "":
|
|
|
|
| 105 |
# Upload the dataset to Hugging Face
|
| 106 |
progress(0, desc="Uploading to Hugging Face...")
|
| 107 |
dataset = Dataset.from_dict({"text": page_texts, "source": page_filenames})
|
| 108 |
+
dataset.push_to_hub(f"{user_id}/{dataset_id}", token=token, private=private)
|
| 109 |
progress(1, desc="Done!")
|
| 110 |
|
| 111 |
instructions = instructions_template.substitute(user_id=user_id, dataset_id=dataset_id)
|
|
|
|
| 165 |
gr.Markdown("## 1️⃣ Upload PDFs")
|
| 166 |
file = gr.File(file_types=["pdf"], file_count="multiple")
|
| 167 |
gr.Markdown(caution_text)
|
| 168 |
+
with gr.Accordion("🔒 Pushing to my personal Hugging Face namespace", open=False):
|
| 169 |
gr.Markdown(
|
| 170 |
"""Recommended for API token
|
| 171 |
- Go to https://huggingface.co/settings/tokens?new_token=true
|
|
|
|
| 177 |
user_id = gr.Textbox(label="User ID", placeholder="Enter your Hugging Face user ID")
|
| 178 |
dataset_id = gr.Textbox(label="Dataset ID", placeholder="Enter the desired dataset ID")
|
| 179 |
token = gr.Textbox(label="API token", placeholder="Enter a Hugging Face API token")
|
| 180 |
+
private = gr.Checkbox(label="Private", default=False)
|
| 181 |
|
| 182 |
gr.Markdown("## 2️⃣ Convert the PDFs and upload")
|
| 183 |
convert_button = gr.Button("🔄 Convert and upload")
|
|
|
|
| 191 |
delete_button = gr.Button("🗑️ Delete dataset")
|
| 192 |
|
| 193 |
# Define the actions
|
| 194 |
+
convert_button.click(
|
| 195 |
+
pdf2dataset, inputs=[file, user_id, dataset_id, token, private], outputs=[instructions, preview, dataset_id_to_delete]
|
| 196 |
+
)
|
| 197 |
delete_button.click(delete_dataset, inputs=[dataset_id_to_delete], outputs=[delete_button])
|
| 198 |
dataset_id_to_delete.input(lambda: "🗑️ Delete dataset", outputs=[delete_button])
|
| 199 |
|