Spaces:
Sleeping
Sleeping
Alexander Watson
commited on
Commit
·
fb73a92
1
Parent(s):
097faaf
fix multi-line bug, add SDK code to download zip
Browse files
app.py
CHANGED
|
@@ -11,8 +11,7 @@ import requests
|
|
| 11 |
import streamlit as st
|
| 12 |
from datasets import load_dataset
|
| 13 |
from gretel_client import Gretel
|
| 14 |
-
from navigator_helpers import
|
| 15 |
-
StreamlitLogHandler)
|
| 16 |
|
| 17 |
# Create a StringIO buffer to capture the logging output
|
| 18 |
log_buffer = StringIO()
|
|
@@ -363,7 +362,9 @@ def main():
|
|
| 363 |
|
| 364 |
with st.expander("Download SDK Code", expanded=False):
|
| 365 |
st.markdown("### Ready to generate data at scale?")
|
| 366 |
-
st.write(
|
|
|
|
|
|
|
| 367 |
|
| 368 |
config_text = f"""
|
| 369 |
#!pip install -Uqq git+https://github.com/gretelai/navigator-helpers.git
|
|
@@ -397,8 +398,8 @@ def main():
|
|
| 397 |
navigator_tabular="{navigator_tabular}",
|
| 398 |
navigator_llm="{navigator_llm}",
|
| 399 |
co_teach_llms={co_teach_llms},
|
| 400 |
-
instruction_format_prompt=
|
| 401 |
-
response_format_prompt=
|
| 402 |
)
|
| 403 |
|
| 404 |
# Create the data augmenter and perform augmentation
|
|
@@ -563,17 +564,23 @@ def main():
|
|
| 563 |
with open(augmented_data_file_path, "w") as augmented_data_file:
|
| 564 |
augmented_data_file.write(augmented_data_jsonl)
|
| 565 |
|
| 566 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 567 |
zip_file_path = os.path.join(temp_dir, "augmentation_results.zip")
|
| 568 |
with zipfile.ZipFile(zip_file_path, "w") as zip_file:
|
| 569 |
zip_file.write(log_file_path, "complete_logs.jsonl")
|
| 570 |
if augmented_data_jsonl:
|
| 571 |
zip_file.write(augmented_data_file_path, "augmented_data.jsonl")
|
|
|
|
| 572 |
|
| 573 |
# Download the ZIP file
|
| 574 |
with open(zip_file_path, "rb") as zip_file:
|
| 575 |
st.download_button(
|
| 576 |
-
label="💾 Download Synthetic Data and
|
| 577 |
data=zip_file.read(),
|
| 578 |
file_name="gretel_synthetic_data.zip",
|
| 579 |
mime="application/zip",
|
|
|
|
| 11 |
import streamlit as st
|
| 12 |
from datasets import load_dataset
|
| 13 |
from gretel_client import Gretel
|
| 14 |
+
from navigator_helpers import DataAugmentationConfig, DataAugmenter, StreamlitLogHandler
|
|
|
|
| 15 |
|
| 16 |
# Create a StringIO buffer to capture the logging output
|
| 17 |
log_buffer = StringIO()
|
|
|
|
| 362 |
|
| 363 |
with st.expander("Download SDK Code", expanded=False):
|
| 364 |
st.markdown("### Ready to generate data at scale?")
|
| 365 |
+
st.write(
|
| 366 |
+
"Get started with your current configuration using the SDK code below:"
|
| 367 |
+
)
|
| 368 |
|
| 369 |
config_text = f"""
|
| 370 |
#!pip install -Uqq git+https://github.com/gretelai/navigator-helpers.git
|
|
|
|
| 398 |
navigator_tabular="{navigator_tabular}",
|
| 399 |
navigator_llm="{navigator_llm}",
|
| 400 |
co_teach_llms={co_teach_llms},
|
| 401 |
+
instruction_format_prompt='''{instruction_format_prompt}''',
|
| 402 |
+
response_format_prompt='''{response_format_prompt}'''
|
| 403 |
)
|
| 404 |
|
| 405 |
# Create the data augmenter and perform augmentation
|
|
|
|
| 564 |
with open(augmented_data_file_path, "w") as augmented_data_file:
|
| 565 |
augmented_data_file.write(augmented_data_jsonl)
|
| 566 |
|
| 567 |
+
# Write the SDK code to a file
|
| 568 |
+
sdk_file_path = os.path.join(temp_dir, "data_augmentation_code.py")
|
| 569 |
+
with open(sdk_file_path, "w") as sdk_file:
|
| 570 |
+
sdk_file.write(config_text)
|
| 571 |
+
|
| 572 |
+
# Create a ZIP file containing the logs, augmented data, and SDK code
|
| 573 |
zip_file_path = os.path.join(temp_dir, "augmentation_results.zip")
|
| 574 |
with zipfile.ZipFile(zip_file_path, "w") as zip_file:
|
| 575 |
zip_file.write(log_file_path, "complete_logs.jsonl")
|
| 576 |
if augmented_data_jsonl:
|
| 577 |
zip_file.write(augmented_data_file_path, "augmented_data.jsonl")
|
| 578 |
+
zip_file.write(sdk_file_path, "data_augmentation_code.py")
|
| 579 |
|
| 580 |
# Download the ZIP file
|
| 581 |
with open(zip_file_path, "rb") as zip_file:
|
| 582 |
st.download_button(
|
| 583 |
+
label="💾 Download Synthetic Data, Logs, and SDK Code",
|
| 584 |
data=zip_file.read(),
|
| 585 |
file_name="gretel_synthetic_data.zip",
|
| 586 |
mime="application/zip",
|