Spaces:

our-sci
/

data-translation-experiments

Sleeping

App Files Files Community

data-translation-experiments / script_for_automation.py

rosemariafontana

Update script_for_automation.py

4c1eaa8 verified about 1 year ago

raw

history blame contribute delete

27 kB

	import requests
	from jsondiff import diff
	import yaml
	import pandas as pd
	import os
	import shutil
	import json
	from datetime import datetime

	# The purpose of this script is to automate running a bunch of tests
	# This script will take an input folder
	# The input folder should contain:
	# 1. A file containing a list of the recipe parameters
	# 2. A file containing the input data for each of the schemas
	# 3. ....

	# Steps to do this that we will outline then perform
	# First, get the gold standard JSONs from baserow
	# Next, get the recipe parameter list from the input folder
	# Iterate through the recipe parameter list one at a time
	# In the iteration, first fill out a surveystack submission - is this possible with the current surveystack API?
	# Next, save the surveystack submission ID (?)
	# Use the iteration parameters to then get the three JSONs back from chatgpt
	# Compare the JSONs to the gold standard JSONs
	# Print out the differences in a .csv
	# Print out a side by side of the yaml
	# store all these together
	# continue through iterations
	# create downloadables of the results

	BASEROW_API_KEY = os.getenv("BASEROW_API_KEY")
	from process_data import process_specifications

	def get_baserow_url(table_id):
	print("GETTING BASEROW URL")
	BASEROW_API_BASE = "https://baserow.f11804a1.federatedcomputer.net/api"
	return f"{BASEROW_API_BASE}/database/rows/table/{table_id}/?user_field_names=true"

	def get_baserow_data():
	# This is to get the gold standards from baserow
	# We will also get the input data

	print("GETTING BASEROW DATA")

	TABLE_ID = "560"

	BASEROW_URL = get_baserow_url(TABLE_ID)

	headers = {
	"Authorization": f"Token {os.environ['BASEROW_API_KEY']}",
	"Content-Type": "application/json"
	}

	print("STARTING TO TRY RESPONSE REQUEST")
	try:
	response = requests.get(BASEROW_URL, headers=headers)
	print("GOT")
	response.raise_for_status()
	rows = response.json()
	results = rows.get("results", [])

	print("PARSING ROWS NOW")

	for row in results:
	print(f"Row ID: {row.get('id')}, Data: {row}")

	if row.get("id") == 2:
	liz_carrot_plantings_gold_standard = row.get("Plantings and Fields - Gold Standard")
	liz_carrot_interactions_gold_standard = row.get("Interactions - Gold Standard")
	liz_carrot_trials_gold_standard = row.get("Trials - Gold Standard")

	liz_carrot_input_data_raw_interview = row.get("Raw Interview")
	liz_carrot_otter_summary_preprocessing = row.get("Otter Summary")
	liz_carrot_greg_summary_preprocessing = row.get("Post-Interview Summary")
	elif row.get("id") == 3:
	ben_soybean_plantings_gold_standard = row.get("Plantings and Fields - Gold Standard")
	ben_soybean_interactions_gold_standard = row.get("Interactions - Gold Standard")
	ben_soybean_trials_gold_standard = row.get("Trials - Gold Standard")

	ben_soybean_input_data_raw_interview = row.get("Raw Interview")
	ben_soybean_otter_summary_preprocessing = row.get("Otter Summary")
	ben_soybean_greg_summary_preprocessing = row.get("Post-Interview Summary")
	elif row.get("id") == 5:
	wally_squash_plantings_gold_standard = row.get("Plantings and Fields - Gold Standard")
	wally_squash_interactions_gold_standard = row.get("Interactions - Gold Standard")
	wally_squash_trials_gold_standard = row.get("Trials - Gold Standard")

	wally_squash_input_data_raw_interview = row.get("Raw Interview")
	wally_squash_otter_summary_preprocessing = row.get("Otter Summary")
	wally_squash_greg_summary_preprocessing = row.get("Post-Interview Summary")

	gold_standards = {
	"liz_carrot": {
	"planting": liz_carrot_plantings_gold_standard,
	"interactions": liz_carrot_interactions_gold_standard,
	"trials": liz_carrot_trials_gold_standard,
	},
	"ben_soybean": {
	"planting": ben_soybean_plantings_gold_standard,
	"interactions": ben_soybean_interactions_gold_standard,
	"trials": ben_soybean_trials_gold_standard,
	},
	"wally_squash": {
	"planting": wally_squash_plantings_gold_standard,
	"interactions": wally_squash_interactions_gold_standard,
	"trials": wally_squash_trials_gold_standard,
	}
	}

	# How to retrieve this data
	# liz_carrot_planting = gold_standards["liz_carrot"]["planting"]

	input_data = {
	"liz_carrot": {
	"raw_interview": liz_carrot_input_data_raw_interview,
	"otter_summary": liz_carrot_otter_summary_preprocessing,
	"greg_summary": liz_carrot_greg_summary_preprocessing
	},
	"ben_soybean": {
	"raw_interview": ben_soybean_input_data_raw_interview,
	"otter_summary": ben_soybean_otter_summary_preprocessing,
	"greg_summary": ben_soybean_greg_summary_preprocessing
	},
	"wally_squash": {
	"raw_interview": wally_squash_input_data_raw_interview,
	"otter_summary": wally_squash_otter_summary_preprocessing,
	"greg_summary": wally_squash_greg_summary_preprocessing
	}
	}

	print("BASEROW DATA DONE GOT")
	print("GOLD STANDARDS HERE")
	print(gold_standards)
	print("INPUT DATA HERE")
	print(input_data)
	return gold_standards, input_data

	except requests.exceptions.RequestException as e:
	print(f"Failed to fetch rows: {e}")

	def get_recipes():
	print("GETTING RECIPES FROM BASEROW NOW")

	#TABLE_ID = "588"
	#TABLE_ID = "578"
	#TABLE_ID = "580" This table contains only one row for testing purposes
	TABLE_ID = "589"

	BASEROW_URL = get_baserow_url(TABLE_ID)

	headers = {
	"Authorization": f"Token {os.environ['BASEROW_API_KEY']}",
	"Content-Type": "application/json"
	}

	print("TRYING TO GET A RESPONSE")
	try:
	response = requests.get(BASEROW_URL, headers=headers)
	response.raise_for_status()
	rows = response.json()
	results = rows.get("results", [])

	my_recipes = []
	print("PARSING ROWS")
	for row in results:
	print(f"Row ID: {row.get('id')}, Data: {row}")
	recipe_id = row.get("Recipe ID")
	testing_strategy_text = row.get("Testing Strategy for Set")

	schema_processing_model = row.get("Schema Processing Model", {}).get("value", None)
	pre_processing_strategy = row.get("Pre-Processing Strategy", [{}])[0].get("value", None)
	pre_processing_text = row.get("Pre-Prompt Text")
	pre_processing_model = row.get("Preprocessing Model", {}).get("value", None)
	prompting_strategy = row.get("Prompting Strategy", [{}])[0].get("value", None)
	plantings_and_fields_prompt = row.get("Plantings and Fields Prompting Text")
	interactions_prompt = row.get("Interactions Prompting Text")
	treatments_prompt = row.get("Treatments Prompting Text")

	recipe_dict = {
	"recipe_id": recipe_id,
	"testing_strategy_text": testing_strategy_text,
	"schema_processing_model": schema_processing_model,
	"pre_processing_strategy": pre_processing_strategy,
	"pre_processing_text": pre_processing_text,
	"pre_processing_model": pre_processing_model,
	"prompting_strategy": prompting_strategy,
	"plantings_and_fields_prompt": plantings_and_fields_prompt,
	"interactions_prompt": interactions_prompt,
	"treatments_prompt": treatments_prompt
	}

	my_recipes.append(recipe_dict)

	print("FINISHED GETTING THE RECIPE DATA")
	print("RECIPES HERE")
	print(my_recipes)
	return my_recipes

	except requests.exceptions.RequestException as e:
	print(f"Failed to fetch rows: {e}")

	def fill_out_survey(recipe_dict, input_data):
	print("filling out survey")
	survey_id = "673b4994aef86f0533b3546c"

	base_url = "https://app.surveystack.io/api/submissions"

	if recipe_dict.get("pre_processing_text") is None:
	pre_processing = False
	pre_process = "no"
	pre_process_model_version = "None"
	else:
	pre_processing = True
	pre_process = recipe_dict

	# Set the prompting strategy to be a variable from the list
	# Do this here

	if pre_processing:
	submission_data = {
	"survey": survey_id,
	"data": {
	"inputstyle": "big-block-input-text",
	"onelonginputtext": input_data,
	"schema_prompt": {
	"firstschemaprompt": recipe_dict["plantings_and_fields_prompt"],
	"secondschemaprompt": recipe_dict["interactions_prompt"],
	"thirdschemaprompt": recipe_dict["treatments_prompt"],
	},
	},
	"parameters": {
	"modelversion": recipe_dict["schema_processing_model"],
	"preprocessdata": ["yes"],
	"promptstyle": recipe_dict["prompting_strategy"],
	"preprocessmodelversion": recipe_dict["prompting_strategy"],
	"multiplepreprompts": "no",
	"prepromptstyle": recipe_dict["pre_processing_strategy"],
	"preprocessingprompt1": recipe_dict["pre_processing_text"],
	"preprocessingprompt2": "",
	"preprocessingprompt3": ""
	}
	}

	else:
	submission_data = {
	"survey": survey_id,
	"data": {
	"inputstyle": "big-block-input-text",
	"onelonginputtext": input_data,
	"schema_prompt": {
	"firstschemaprompt": recipe_dict["plantings_and_fields_prompt"],
	"secondschemaprompt": recipe_dict["interactions_prompt"],
	"thirdschemaprompt": recipe_dict["treatments_prompt"],
	},
	},
	"parameters": {
	"modelversion": recipe_dict["schema_processing_model"],
	"preprocessdata": ["no"],
	"promptstyle": recipe_dict["prompting_strategy"],
	"preprocessmodelversion": None,
	"multiplepreprompts": "no",
	"prepromptstyle": None,
	"preprocessingprompt1": None,
	"preprocessingprompt2": None,
	"preprocessingprompt3": None

	}
	}

	headers = {
	"Content-Type": "application/json",
	}

	print("GETTING SURVEY RESPONSE")
	try:
	response = requests.post(base_url, headers=headers, data=json.dumps(submission_data))
	response.raise_for_status()

	if response.status_code == 200:
	print("Submission successful to SurveyStack!")
	print(response.json())
	return submission_data
	else:
	print(f"Failed to submit: {response.status_code} - {response.text}")
	except requests.exceptions.RequestException as e:
	print(f"An error occurred while submitting the data: {e}")

	def get_data_ready(recipe_dict, input_data_piece):
	## Input chunk structure
	# "raw_interview": liz_carrot_input_data_raw_interview,
	#
	#
	# recipe_dict = {
	# "recipe_id": recipe_id,
	# "testing_strategy_text": testing_strategy_text,
	# "schema_processing_model", schema_processing_model,
	# "pre_processing_strategy", pre_processing_strategy,
	# "pre_processing_text", pre_processing_text,
	# "pre_processing_model", pre_processing_model,
	# "prompting_strategy", prompting_strategy,
	# "plantings_and_fields_prompt", plantings_and_fields_prompt,
	# "interactions_prompt", interactions_prompt,
	# "treatments_prompt", treatments_prompt
	# }
	#
	print("GETTING DATA READY")
	processed_data = {}
	processed_data["prompts"] = {}

	processed_data["inputstyle"] = 'big-block-input-text'
	processed_data["input_text"] = input_data_piece
	processed_data["prompts"]["firstschemaprompt"] = recipe_dict["plantings_and_fields_prompt"]
	processed_data["prompts"]["secondschemaprompt"] = recipe_dict["interactions_prompt"]
	processed_data["prompts"]["thirdschemaprompt"] = recipe_dict["treatments_prompt"]

	processed_data["parameters"] = {}
	processed_data["parameters"]["modelversion"] = recipe_dict["schema_processing_model"]
	processed_data["parameters"]["promptstyle"] = recipe_dict["prompting_strategy"]

	if (recipe_dict["pre_processing_strategy"] == "None") and (recipe_dict["pre_processing_model"] == "No preprocessing"):
	processed_data["parameters"]["preprocessdata"] = "no"
	else:
	processed_data["parameters"]["preprocessdata"] = "yes"
	processed_data["parameters"]["preprocessmodelversion"] = recipe_dict["pre_processing_model"]
	processed_data["parameters"]["multiplepreprompts"] = "no"
	processed_data["parameters"]["prepromptstyle"] = recipe_dict["pre_processing_strategy"]
	processed_data["parameters"]["preprocessingprompt1"] = recipe_dict["pre_processing_text"]
	processed_data["parameters"]["preprocessingprompt2"] = ""
	processed_data["parameters"]["preprocessingprompt3"] = ""

	print("DID THAT NOW")
	return processed_data

	def format_json(json_data, truncate_length=500):
	try:
	# Try to load the JSON data
	parsed_data = json.loads(json_data)
	# Convert it into a pretty-printed string
	formatted_json = json.dumps(parsed_data, indent=2)
	# Truncate if it's too long
	return formatted_json[:truncate_length] + "..." if len(formatted_json) > truncate_length else formatted_json
	except json.JSONDecodeError:
	# If it's not valid JSON, return the string as it is
	return json_data[:truncate_length] + "..." if len(json_data) > truncate_length else json_data

	# Custom method to handle all objects
	def custom_serializer(obj):
	if isinstance(obj, Enum):
	return obj.name # Or obj.value, depending on what you need
	if isinstance(obj, Soil):
	return obj.to_dict()
	if isinstance(obj, Yield):
	return obj.to_dict()
	return obj.__dict__ # Default case: use the __dict__ method for other custom objects

	def sanitize_json_for_yaml(data):
	if isinstance(data, dict):
	return {key: sanitize_json_for_yaml(value) for key, value in data.items()}
	elif isinstance(data, list):
	return [sanitize_json_for_yaml(item) for item in data]
	elif isinstance(data, tuple): # Convert tuples to lists
	return list(data)
	else:
	return data # Keep other types as-is

	def generate_markdown_output(df):
	# Start the markdown output string
	markdown = ""

	# 1. Input Transcript
	markdown += "\n## Input Transcript\n"
	for _, row in df.iterrows():
	truncated_input = row['Input_Transcript'][:500] + "..." if len(row['Input_Transcript']) > 500 else row['Input_Transcript']
	markdown += f"Recipe ID {row['Recipe_ID']}:\n```\n{truncated_input}\n```\n\n"

	# 2. Recipe Fields
	markdown += "\n## Recipe Fields\n"
	recipe_columns = [
	"Recipe ID", "Testing Strategy", "Schema Processing Model", "Pre-Processing Strategy",
	"Pre-Processing Text", "Pre-Processing Model", "Prompting Strategy"
	]
	recipe_table = "\| " + " \| ".join(recipe_columns) + " \|\n"
	recipe_table += "\| " + " \| ".join(["-" * len(col) for col in recipe_columns]) + " \|\n"
	for _, row in df.iterrows():
	recipe_table += f"\| {row['Recipe_ID']} \| {row['Testing_Strategy_Text']} \| {row['Schema_Processing_Model']} \| {row['Pre_Processing_Strategy']} \| {row['Pre_Processing_Text']} \| {row['Pre_Processing_Model']} \| {row['Prompting_Strategy']} \|\n"
	markdown += recipe_table + "\n"

	# 3. Differences
	markdown += "\n## Differences\n"
	for _, row in df.iterrows():
	markdown += f"\n### Recipe ID: {row['Recipe_ID']}\n"
	differences = row['Differences']

	# Loop through the differences list
	for key, value in differences.items():
	markdown += f"#### {key.capitalize()}\n"
	for item in value:
	markdown += f" - {item}\n"

	# 4. Prompts
	markdown += "\n## Prompts\n"
	prompt_columns = ["Plantings and Fields Prompt", "Interactions Prompt", "Treatments Prompt"]
	prompt_table = "\| " + " \| ".join(prompt_columns) + " \|\n"
	prompt_table += "\| " + " \| ".join(["-" * len(col) for col in prompt_columns]) + " \|\n"
	for _, row in df.iterrows():
	prompt_table += f"\| {row['Plantings_and_Fields_Prompt']} \| {row['Interactions_Prompt']} \| {row['Treatments_Prompt']} \|\n"
	markdown += prompt_table + "\n"

	# 5. Side-by-Side JSON Comparisons
	markdown += "\n## Gold Standard vs Machine Generated JSON\n"
	for _, row in df.iterrows():
	markdown += f"\n### Recipe ID: {row['Recipe_ID']}\n"
	for key in ["planting", "interactions", "trials"]:
	gold = json.dumps(row['Gold_Standard_JSON'].get(key, {}), indent=2)
	machine = json.dumps(row['Machine_Generated_JSON'].get(key, {}), default=custom_serializer, indent=2)
	markdown += f"#### {key.capitalize()}\n"
	markdown += f"Gold Standard JSON:\n```json\n{gold}\n```\n"
	markdown += f"Machine Generated JSON:\n```json\n{machine}\n```\n"

	# 6. Side-by-Side YAML Comparisons
	markdown += "\n## Gold Standard vs Machine Generated YAML\n"
	for _, row in df.iterrows():
	markdown += f"\n### Recipe ID: {row['Recipe_ID']}\n"
	for key in ["planting", "interactions", "trials"]:
	gold = yaml.dump(row['Gold_Standard_JSON'].get(key, {}), default_flow_style=False, sort_keys=True)
	machine = yaml.dump(row['Machine_Generated_JSON'].get(key, {}), default_flow_style=False, sort_keys=True)
	markdown += f"#### {key.capitalize()}\n"
	markdown += f"Gold Standard YAML:\n```yaml\n{gold}\n```\n"
	markdown += f"Machine Generated YAML:\n```yaml\n{machine}\n```\n"

	return markdown


	def drive_process():
	# this is to drive the processing process
	print("We are starting to DRIVE PROCESS")

	# Get the data from baserow (gold standards JSON and Input data)
	gold_standards, input_data = get_baserow_data()

	# Get the recipes from baserow too
	my_recipes = get_recipes()

	# Input chunk structure
	# "liz_carrot": {
	# "raw_interview": liz_carrot_input_data_raw_interview,
	# "otter_summary": liz_carrot_otter_summary_preprocessing,
	# "greg_summary": liz_carrot_greg_summary_preprocessing
	# },

	print("Making the OUTPUT STUFF")
	output_folder = "output_results_" +datetime.now().strftime("%Y%m%d_%H%M%S")
	os.makedirs(output_folder, exist_ok=True)

	print("GOING THROUGH RECIPES NOW")
	for recipe_dict in my_recipes:
	for key, input_chunks in input_data.items():
	output_rows = []
	print("RECIPE INFO")
	print(key)
	print(recipe_dict["recipe_id"])

	# Get the input data based on the recipe
	if recipe_dict["pre_processing_strategy"] == "Otter.ai Summary":
	input_data_piece = input_chunks["otter_summary"]
	elif recipe_dict["pre_processing_strategy"] == "Greg Summary":
	input_data_piece = input_chunks["greg_summary"]
	else:
	input_data_piece = input_chunks["raw_interview"]

	print("DECIDED INPUT DATA")
	print(input_data_piece)

	# Fill out a Surveystack submission
	# This isn't accepted by the data
	#fill_out_survey(recipe_dict, input_data)

	# Prepare the data for the structured output setup
	proc_spec = get_data_ready(recipe_dict, input_data_piece)

	print("Gold Standard")
	# Get the gold standard for this input_chunk (key = liz_carrot, ben_soybean, wally_squash)
	gold_standard_json = gold_standards[key]

	# "liz_carrot": {
	# "planting": liz_carrot_plantings_gold_standard,
	# "interactions": liz_carrot_interactions_gold_standard,
	# "trials": liz_carrot_trials_gold_standard,
	# },

	gold_standard_planting_json = json.loads(gold_standard_json["planting"])
	gold_standard_interactions_json = json.loads(gold_standard_json["interactions"])
	gold_standard_trials_json = json.loads(gold_standard_json["trials"])

	print("Gold standard json after loading")
	print(gold_standard_planting_json)

	print("PROCESSING SPECIFICATIONS!!!!!!!!!!!!!!!")
	processed_farm_activity_json, processed_interactions_json, processed_trials_json = process_specifications(proc_spec)

	# THIS SHOULD ONLY BE USED FOR TESTING
	#processed_farm_activity_json = gold_standard_planting_json
	#processed_interactions_json = gold_standard_interactions_json
	#processed_trials_json = gold_standard_trials_json

	processed_farm_activity_json = json.loads(processed_farm_activity_json)
	processed_interactions_json = json.loads(processed_interactions_json)
	processed_trials_json = json.loads(processed_trials_json)

	print("Processed and loaded 1st json from machine gen")
	print(processed_farm_activity_json)

	# Compare the generated JSON to the gold standard
	differences_planting = list(diff(gold_standard_planting_json, processed_farm_activity_json))
	differences_interactions = list(diff(gold_standard_interactions_json, processed_interactions_json))
	differences_trials = list(diff(gold_standard_trials_json, processed_trials_json))

	print("Diff planting")
	print(differences_planting)

	# Convert to yaml
	completed_gold_standard_planting_json = sanitize_json_for_yaml(gold_standard_planting_json)
	completed_gold_standard_interactions_json = sanitize_json_for_yaml(gold_standard_interactions_json)
	completed_gold_standard_trials_json = sanitize_json_for_yaml(gold_standard_trials_json)

	completed_processed_farm_activity_json = sanitize_json_for_yaml(processed_farm_activity_json)
	completed_processed_interactions_json = sanitize_json_for_yaml(processed_interactions_json)
	completed_processed_trials_json = sanitize_json_for_yaml(processed_trials_json)

	json_diff = {
	"planting": differences_planting,
	"interactions": differences_interactions,
	"trials": differences_trials
	}

	gold_standard_json = {
	"planting": completed_gold_standard_planting_json,
	"interactions": completed_gold_standard_interactions_json,
	"trials": completed_gold_standard_trials_json
	}

	comparison_json = {
	"planting": completed_processed_farm_activity_json,
	"interactions": completed_processed_interactions_json,
	"trials": completed_processed_trials_json
	}

	recipe_id = recipe_dict.get("recipe_id", "N/A")
	output_rows.append({
	"Recipe_ID": recipe_id,
	"Testing_Strategy_Text": recipe_dict.get("testing_strategy_text", "N/A"),
	"Schema_Processing_Model": recipe_dict.get("schema_processing_model", "N/A"),
	"Pre_Processing_Strategy": recipe_dict.get("pre_processing_strategy", "N/A"),
	"Pre_Processing_Text": recipe_dict.get("pre_processing_text", "N/A"),
	"Pre_Processing_Model": recipe_dict.get("pre_processing_model", "N/A"),
	"Prompting_Strategy": recipe_dict.get("prompting_strategy", "N/A"),
	"Plantings_and_Fields_Prompt": recipe_dict.get("plantings_and_fields_prompt", "N/A"),
	"Interactions_Prompt": recipe_dict.get("interactions_prompt", "N/A"),
	"Treatments_Prompt": recipe_dict.get("treatments_prompt", "N/A"),
	"Input_Transcript": input_chunks,
	"Gold_Standard_JSON": gold_standard_json,
	"Machine_Generated_JSON": comparison_json,
	"Differences": json_diff
	})

	df = pd.DataFrame(output_rows)

	print("dataframe done now onto markdown")

	markdown_output = generate_markdown_output(df)
	recipe_folder = os.path.join(output_folder, f"recipe_{recipe_dict['recipe_id']}")
	os.makedirs(recipe_folder, exist_ok=True)

	# Save markdown to file
	markdown_file = os.path.join(recipe_folder, f"recipe_{recipe_dict['recipe_id']}_data_{key}_output.md")
	with open(markdown_file, 'w') as f:
	f.write(markdown_output)

	# Save JSON files
	json_file_gold = os.path.join(recipe_folder, f"recipe_{recipe_dict['recipe_id']}_data_{key}_gold_standard.json")
	json_file_generated = os.path.join(recipe_folder, f"recipe_{recipe_dict['recipe_id']}_data_{key}_generated.json")
	with open(json_file_gold, 'w') as f:
	json.dump(gold_standard_json, f, indent=2)
	with open(json_file_generated, 'w') as f:
	json.dump(comparison_json, f, indent=2)

	# Optionally save differences as a separate file
	differences_file = os.path.join(recipe_folder, f"recipe_{recipe_dict['recipe_id']}_data_{key}_differences.json")
	with open(differences_file, 'w') as f:
	f.write(str(differences_file))

	print("ZIPPING UP WHOLE THING")
	# Zip the entire output folder
	zip_filename = f"{output_folder}.zip"
	shutil.make_archive(output_folder, 'zip', output_folder)

	# Cleanup by removing the unzipped folder after zipping it
	shutil.rmtree(output_folder)

	# Return the zip file for downloading
	return zip_filename

	return output_folder