Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -125,9 +125,7 @@ def call_llm(prompt, model="gpt-3.5-turbo"):
|
|
| 125 |
if st.session_state.show_template_editor and st.session_state.template_spec:
|
| 126 |
output_vars = st.session_state.template_spec.get("output", [])
|
| 127 |
if output_vars:
|
| 128 |
-
output_specs =
|
| 129 |
-
"Please generate output with the following specifications:\n"
|
| 130 |
-
)
|
| 131 |
for var in output_vars:
|
| 132 |
output_specs += (
|
| 133 |
f"- {var['name']}: {var['description']} (Type: {var['type']})"
|
|
@@ -137,15 +135,49 @@ def call_llm(prompt, model="gpt-3.5-turbo"):
|
|
| 137 |
output_specs += "\n"
|
| 138 |
|
| 139 |
# Add the output specs to the prompt
|
| 140 |
-
prompt = f"{prompt}\n\n{output_specs}"
|
| 141 |
|
| 142 |
response = client.chat.completions.create(
|
| 143 |
model=model,
|
| 144 |
messages=[{"role": "user", "content": prompt}],
|
| 145 |
max_tokens=1000,
|
| 146 |
-
temperature=0.7,
|
| 147 |
)
|
| 148 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 149 |
except Exception as e:
|
| 150 |
st.error(f"Error calling LLM API: {str(e)}")
|
| 151 |
return f"Error: {str(e)}"
|
|
@@ -200,6 +232,8 @@ Generate a JSON template specification with the following structure:
|
|
| 200 |
}}
|
| 201 |
|
| 202 |
Make sure the prompt includes all input variables and is designed to produce the expected outputs.
|
|
|
|
|
|
|
| 203 |
If a 'lore' or 'knowledge_base' should be incorporated, include {{lore}} in the prompt template.
|
| 204 |
If document content was provided, design the template to effectively use that information.
|
| 205 |
"""
|
|
@@ -1463,15 +1497,31 @@ with tab3:
|
|
| 1463 |
if st.session_state.generated_output:
|
| 1464 |
st.header("Generated Output")
|
| 1465 |
st.markdown("### Result")
|
| 1466 |
-
st.write(st.session_state.generated_output)
|
| 1467 |
|
| 1468 |
-
#
|
| 1469 |
-
st.
|
| 1470 |
-
|
| 1471 |
-
|
| 1472 |
-
|
| 1473 |
-
|
| 1474 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1475 |
else:
|
| 1476 |
st.info(
|
| 1477 |
"No template has been generated yet. Go to the 'Setup' tab to create one."
|
|
@@ -1511,6 +1561,10 @@ with tab4:
|
|
| 1511 |
st.session_state.combined_data = []
|
| 1512 |
if "show_json_columns" not in st.session_state:
|
| 1513 |
st.session_state.show_json_columns = False
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1514 |
|
| 1515 |
# Generate inputs button
|
| 1516 |
if st.button("Generate Synthetic Inputs"):
|
|
@@ -1528,6 +1582,12 @@ with tab4:
|
|
| 1528 |
st.success(
|
| 1529 |
f"Generated {len(st.session_state.synthetic_inputs)} input samples"
|
| 1530 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1531 |
|
| 1532 |
# Display generated inputs if available
|
| 1533 |
if st.session_state.synthetic_inputs:
|
|
@@ -1546,21 +1606,162 @@ with tab4:
|
|
| 1546 |
mime="text/csv",
|
| 1547 |
)
|
| 1548 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1549 |
# Generate outputs button
|
| 1550 |
-
if st.button("Generate Outputs for
|
| 1551 |
if not st.session_state.get("api_key"):
|
| 1552 |
st.error("Please provide an OpenAI API key in the sidebar.")
|
|
|
|
|
|
|
| 1553 |
else:
|
| 1554 |
-
|
| 1555 |
-
|
| 1556 |
-
|
| 1557 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1558 |
st.session_state.knowledge_base,
|
| 1559 |
)
|
| 1560 |
|
| 1561 |
-
if
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1562 |
st.success(
|
| 1563 |
-
f"Generated outputs for {len(
|
| 1564 |
)
|
| 1565 |
|
| 1566 |
# Display combined data if available
|
|
|
|
| 125 |
if st.session_state.show_template_editor and st.session_state.template_spec:
|
| 126 |
output_vars = st.session_state.template_spec.get("output", [])
|
| 127 |
if output_vars:
|
| 128 |
+
output_specs = "Please generate output with the following specifications in JSON format:\n"
|
|
|
|
|
|
|
| 129 |
for var in output_vars:
|
| 130 |
output_specs += (
|
| 131 |
f"- {var['name']}: {var['description']} (Type: {var['type']})"
|
|
|
|
| 135 |
output_specs += "\n"
|
| 136 |
|
| 137 |
# Add the output specs to the prompt
|
| 138 |
+
prompt = f"{prompt}\n\n{output_specs}\n\nReturn ONLY a JSON object with the output variables, with no additional text or explanation."
|
| 139 |
|
| 140 |
response = client.chat.completions.create(
|
| 141 |
model=model,
|
| 142 |
messages=[{"role": "user", "content": prompt}],
|
| 143 |
max_tokens=1000,
|
| 144 |
+
temperature=st.session_state.get("temperature", 0.7),
|
| 145 |
)
|
| 146 |
+
|
| 147 |
+
result = response.choices[0].message.content
|
| 148 |
+
|
| 149 |
+
# Try to parse as JSON if the template has output variables
|
| 150 |
+
if (
|
| 151 |
+
st.session_state.show_template_editor
|
| 152 |
+
and st.session_state.template_spec
|
| 153 |
+
and st.session_state.template_spec.get("output")
|
| 154 |
+
):
|
| 155 |
+
# Extract JSON from the response
|
| 156 |
+
json_pattern = r"```json\s*([\s\S]*?)\s*```|^\s*\{[\s\S]*\}\s*$"
|
| 157 |
+
json_match = re.search(json_pattern, result)
|
| 158 |
+
|
| 159 |
+
if json_match:
|
| 160 |
+
json_str = json_match.group(1) if json_match.group(1) else result
|
| 161 |
+
# Clean up any remaining markdown or comments
|
| 162 |
+
json_str = re.sub(r"```.*|```", "", json_str).strip()
|
| 163 |
+
try:
|
| 164 |
+
output_data = json.loads(json_str)
|
| 165 |
+
# Store the parsed JSON in session state for proper rendering
|
| 166 |
+
st.session_state.json_output = output_data
|
| 167 |
+
return output_data
|
| 168 |
+
except:
|
| 169 |
+
pass
|
| 170 |
+
else:
|
| 171 |
+
try:
|
| 172 |
+
output_data = json.loads(result)
|
| 173 |
+
# Store the parsed JSON in session state for proper rendering
|
| 174 |
+
st.session_state.json_output = output_data
|
| 175 |
+
return output_data
|
| 176 |
+
except:
|
| 177 |
+
pass
|
| 178 |
+
|
| 179 |
+
# If we couldn't parse as JSON or it's not meant to be JSON, return as is
|
| 180 |
+
return result
|
| 181 |
except Exception as e:
|
| 182 |
st.error(f"Error calling LLM API: {str(e)}")
|
| 183 |
return f"Error: {str(e)}"
|
|
|
|
| 232 |
}}
|
| 233 |
|
| 234 |
Make sure the prompt includes all input variables and is designed to produce the expected outputs.
|
| 235 |
+
The prompt should address an LLM as if it was a combination of a system prompt and user input, and must contain information around formatting,
|
| 236 |
+
structure and context for the LLM to generate the desired content as derived from these instructions and/or documents.
|
| 237 |
If a 'lore' or 'knowledge_base' should be incorporated, include {{lore}} in the prompt template.
|
| 238 |
If document content was provided, design the template to effectively use that information.
|
| 239 |
"""
|
|
|
|
| 1497 |
if st.session_state.generated_output:
|
| 1498 |
st.header("Generated Output")
|
| 1499 |
st.markdown("### Result")
|
|
|
|
| 1500 |
|
| 1501 |
+
# Check if the output is a dictionary (JSON)
|
| 1502 |
+
if isinstance(st.session_state.generated_output, dict):
|
| 1503 |
+
# Display as JSON
|
| 1504 |
+
st.json(st.session_state.generated_output)
|
| 1505 |
+
|
| 1506 |
+
# Option to save the output as JSON
|
| 1507 |
+
output_json = json.dumps(st.session_state.generated_output, indent=2)
|
| 1508 |
+
st.download_button(
|
| 1509 |
+
label="Download Output (JSON)",
|
| 1510 |
+
data=output_json,
|
| 1511 |
+
file_name="generated_output.json",
|
| 1512 |
+
mime="application/json",
|
| 1513 |
+
)
|
| 1514 |
+
else:
|
| 1515 |
+
# Display as text
|
| 1516 |
+
st.write(st.session_state.generated_output)
|
| 1517 |
+
|
| 1518 |
+
# Option to save the output as text
|
| 1519 |
+
st.download_button(
|
| 1520 |
+
label="Download Output",
|
| 1521 |
+
data=str(st.session_state.generated_output),
|
| 1522 |
+
file_name="generated_output.txt",
|
| 1523 |
+
mime="text/plain",
|
| 1524 |
+
)
|
| 1525 |
else:
|
| 1526 |
st.info(
|
| 1527 |
"No template has been generated yet. Go to the 'Setup' tab to create one."
|
|
|
|
| 1561 |
st.session_state.combined_data = []
|
| 1562 |
if "show_json_columns" not in st.session_state:
|
| 1563 |
st.session_state.show_json_columns = False
|
| 1564 |
+
if "modified_prompt_template" not in st.session_state:
|
| 1565 |
+
st.session_state.modified_prompt_template = ""
|
| 1566 |
+
if "selected_samples" not in st.session_state:
|
| 1567 |
+
st.session_state.selected_samples = []
|
| 1568 |
|
| 1569 |
# Generate inputs button
|
| 1570 |
if st.button("Generate Synthetic Inputs"):
|
|
|
|
| 1582 |
st.success(
|
| 1583 |
f"Generated {len(st.session_state.synthetic_inputs)} input samples"
|
| 1584 |
)
|
| 1585 |
+
# Reset selected samples when new inputs are generated
|
| 1586 |
+
st.session_state.selected_samples = []
|
| 1587 |
+
# Reset modified prompt when new inputs are generated
|
| 1588 |
+
st.session_state.modified_prompt_template = (
|
| 1589 |
+
st.session_state.template_spec["prompt"]
|
| 1590 |
+
)
|
| 1591 |
|
| 1592 |
# Display generated inputs if available
|
| 1593 |
if st.session_state.synthetic_inputs:
|
|
|
|
| 1606 |
mime="text/csv",
|
| 1607 |
)
|
| 1608 |
|
| 1609 |
+
# Sample selection for output generation
|
| 1610 |
+
st.subheader("Generate Outputs")
|
| 1611 |
+
|
| 1612 |
+
# Initialize the modified prompt template if not already done
|
| 1613 |
+
if not st.session_state.modified_prompt_template:
|
| 1614 |
+
st.session_state.modified_prompt_template = (
|
| 1615 |
+
st.session_state.template_spec["prompt"]
|
| 1616 |
+
)
|
| 1617 |
+
|
| 1618 |
+
# Allow editing the prompt template
|
| 1619 |
+
with st.expander("View/Edit Prompt Template", expanded=False):
|
| 1620 |
+
st.info(
|
| 1621 |
+
"You can modify the prompt template used for generating outputs. Use {variable_name} to refer to input variables."
|
| 1622 |
+
)
|
| 1623 |
+
|
| 1624 |
+
st.session_state.modified_prompt_template = st.text_area(
|
| 1625 |
+
"Prompt Template",
|
| 1626 |
+
value=st.session_state.modified_prompt_template,
|
| 1627 |
+
height=200,
|
| 1628 |
+
)
|
| 1629 |
+
|
| 1630 |
+
# Button to reset to original template
|
| 1631 |
+
if st.button("Reset to Original Template"):
|
| 1632 |
+
st.session_state.modified_prompt_template = (
|
| 1633 |
+
st.session_state.template_spec["prompt"]
|
| 1634 |
+
)
|
| 1635 |
+
st.success("Prompt template reset to original")
|
| 1636 |
+
|
| 1637 |
+
# Sample selection options
|
| 1638 |
+
selection_method = st.radio(
|
| 1639 |
+
"Select samples for output generation",
|
| 1640 |
+
options=["Generate for all samples", "Select specific samples"],
|
| 1641 |
+
index=0,
|
| 1642 |
+
)
|
| 1643 |
+
|
| 1644 |
+
if selection_method == "Select specific samples":
|
| 1645 |
+
# Create a list of sample indices for selection
|
| 1646 |
+
sample_options = [
|
| 1647 |
+
f"Sample {i+1}"
|
| 1648 |
+
for i in range(len(st.session_state.synthetic_inputs))
|
| 1649 |
+
]
|
| 1650 |
+
|
| 1651 |
+
# Allow multi-selection of samples
|
| 1652 |
+
selected_indices = st.multiselect(
|
| 1653 |
+
"Select samples to generate outputs for",
|
| 1654 |
+
options=range(len(sample_options)),
|
| 1655 |
+
format_func=lambda i: sample_options[i],
|
| 1656 |
+
)
|
| 1657 |
+
|
| 1658 |
+
# Store selected samples
|
| 1659 |
+
st.session_state.selected_samples = selected_indices
|
| 1660 |
+
|
| 1661 |
+
# Preview selected samples
|
| 1662 |
+
if selected_indices:
|
| 1663 |
+
st.write(f"Selected {len(selected_indices)} samples:")
|
| 1664 |
+
selected_df = pd.DataFrame(
|
| 1665 |
+
[st.session_state.synthetic_inputs[i] for i in selected_indices]
|
| 1666 |
+
)
|
| 1667 |
+
st.dataframe(selected_df)
|
| 1668 |
+
else:
|
| 1669 |
+
# Use all samples
|
| 1670 |
+
st.session_state.selected_samples = list(
|
| 1671 |
+
range(len(st.session_state.synthetic_inputs))
|
| 1672 |
+
)
|
| 1673 |
+
|
| 1674 |
+
# Preview the prompt for a selected sample
|
| 1675 |
+
if st.session_state.selected_samples:
|
| 1676 |
+
with st.expander("Preview Prompt for Sample", expanded=False):
|
| 1677 |
+
# Let user select which sample to preview
|
| 1678 |
+
preview_index = st.selectbox(
|
| 1679 |
+
"Select a sample to preview prompt",
|
| 1680 |
+
options=st.session_state.selected_samples,
|
| 1681 |
+
format_func=lambda i: f"Sample {i+1}",
|
| 1682 |
+
)
|
| 1683 |
+
|
| 1684 |
+
# Get the selected sample
|
| 1685 |
+
sample = st.session_state.synthetic_inputs[preview_index]
|
| 1686 |
+
|
| 1687 |
+
# Fill the prompt template with sample values
|
| 1688 |
+
filled_prompt = st.session_state.modified_prompt_template
|
| 1689 |
+
for var_name, var_value in sample.items():
|
| 1690 |
+
filled_prompt = filled_prompt.replace(
|
| 1691 |
+
f"{{{var_name}}}", str(var_value)
|
| 1692 |
+
)
|
| 1693 |
+
|
| 1694 |
+
# Replace {lore} with knowledge base if present
|
| 1695 |
+
if "{lore}" in filled_prompt:
|
| 1696 |
+
filled_prompt = filled_prompt.replace(
|
| 1697 |
+
"{lore}", st.session_state.knowledge_base
|
| 1698 |
+
)
|
| 1699 |
+
|
| 1700 |
+
# Show the filled prompt
|
| 1701 |
+
st.text_area(
|
| 1702 |
+
"Filled Prompt", value=filled_prompt, height=300, disabled=True
|
| 1703 |
+
)
|
| 1704 |
+
|
| 1705 |
# Generate outputs button
|
| 1706 |
+
if st.button("Generate Outputs for Selected Samples"):
|
| 1707 |
if not st.session_state.get("api_key"):
|
| 1708 |
st.error("Please provide an OpenAI API key in the sidebar.")
|
| 1709 |
+
elif not st.session_state.selected_samples:
|
| 1710 |
+
st.error("No samples selected for output generation.")
|
| 1711 |
else:
|
| 1712 |
+
# Create a copy of the template spec with the modified prompt
|
| 1713 |
+
modified_template = st.session_state.template_spec.copy()
|
| 1714 |
+
modified_template["prompt"] = (
|
| 1715 |
+
st.session_state.modified_prompt_template
|
| 1716 |
+
)
|
| 1717 |
+
|
| 1718 |
+
# Get only the selected samples
|
| 1719 |
+
selected_inputs = [
|
| 1720 |
+
st.session_state.synthetic_inputs[i]
|
| 1721 |
+
for i in st.session_state.selected_samples
|
| 1722 |
+
]
|
| 1723 |
+
|
| 1724 |
+
with st.spinner(
|
| 1725 |
+
f"Generating outputs for {len(selected_inputs)} samples..."
|
| 1726 |
+
):
|
| 1727 |
+
generated_outputs = generate_synthetic_outputs(
|
| 1728 |
+
modified_template,
|
| 1729 |
+
selected_inputs,
|
| 1730 |
st.session_state.knowledge_base,
|
| 1731 |
)
|
| 1732 |
|
| 1733 |
+
if generated_outputs:
|
| 1734 |
+
# If we're generating for all samples, replace the combined data
|
| 1735 |
+
if selection_method == "Generate for all samples":
|
| 1736 |
+
st.session_state.combined_data = generated_outputs
|
| 1737 |
+
else:
|
| 1738 |
+
# If we're generating for specific samples, update only those samples
|
| 1739 |
+
# First, ensure combined_data exists and has the right size
|
| 1740 |
+
if not st.session_state.combined_data or len(
|
| 1741 |
+
st.session_state.combined_data
|
| 1742 |
+
) != len(st.session_state.synthetic_inputs):
|
| 1743 |
+
st.session_state.combined_data = [None] * len(
|
| 1744 |
+
st.session_state.synthetic_inputs
|
| 1745 |
+
)
|
| 1746 |
+
|
| 1747 |
+
# Update only the selected samples
|
| 1748 |
+
for i, output_idx in enumerate(
|
| 1749 |
+
st.session_state.selected_samples
|
| 1750 |
+
):
|
| 1751 |
+
if i < len(generated_outputs):
|
| 1752 |
+
st.session_state.combined_data[output_idx] = (
|
| 1753 |
+
generated_outputs[i]
|
| 1754 |
+
)
|
| 1755 |
+
|
| 1756 |
+
# Remove any None values (samples that haven't been generated yet)
|
| 1757 |
+
st.session_state.combined_data = [
|
| 1758 |
+
item
|
| 1759 |
+
for item in st.session_state.combined_data
|
| 1760 |
+
if item is not None
|
| 1761 |
+
]
|
| 1762 |
+
|
| 1763 |
st.success(
|
| 1764 |
+
f"Generated outputs for {len(generated_outputs)} samples"
|
| 1765 |
)
|
| 1766 |
|
| 1767 |
# Display combined data if available
|