DJHumanRPT commited on
Commit
136fb03
·
verified ·
1 Parent(s): e25ab06

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +222 -21
app.py CHANGED
@@ -125,9 +125,7 @@ def call_llm(prompt, model="gpt-3.5-turbo"):
125
  if st.session_state.show_template_editor and st.session_state.template_spec:
126
  output_vars = st.session_state.template_spec.get("output", [])
127
  if output_vars:
128
- output_specs = (
129
- "Please generate output with the following specifications:\n"
130
- )
131
  for var in output_vars:
132
  output_specs += (
133
  f"- {var['name']}: {var['description']} (Type: {var['type']})"
@@ -137,15 +135,49 @@ def call_llm(prompt, model="gpt-3.5-turbo"):
137
  output_specs += "\n"
138
 
139
  # Add the output specs to the prompt
140
- prompt = f"{prompt}\n\n{output_specs}"
141
 
142
  response = client.chat.completions.create(
143
  model=model,
144
  messages=[{"role": "user", "content": prompt}],
145
  max_tokens=1000,
146
- temperature=0.7,
147
  )
148
- return response.choices[0].message.content
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
149
  except Exception as e:
150
  st.error(f"Error calling LLM API: {str(e)}")
151
  return f"Error: {str(e)}"
@@ -200,6 +232,8 @@ Generate a JSON template specification with the following structure:
200
  }}
201
 
202
  Make sure the prompt includes all input variables and is designed to produce the expected outputs.
 
 
203
  If a 'lore' or 'knowledge_base' should be incorporated, include {{lore}} in the prompt template.
204
  If document content was provided, design the template to effectively use that information.
205
  """
@@ -1463,15 +1497,31 @@ with tab3:
1463
  if st.session_state.generated_output:
1464
  st.header("Generated Output")
1465
  st.markdown("### Result")
1466
- st.write(st.session_state.generated_output)
1467
 
1468
- # Option to save the output
1469
- st.download_button(
1470
- label="Download Output",
1471
- data=st.session_state.generated_output,
1472
- file_name="generated_output.txt",
1473
- mime="text/plain",
1474
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1475
  else:
1476
  st.info(
1477
  "No template has been generated yet. Go to the 'Setup' tab to create one."
@@ -1511,6 +1561,10 @@ with tab4:
1511
  st.session_state.combined_data = []
1512
  if "show_json_columns" not in st.session_state:
1513
  st.session_state.show_json_columns = False
 
 
 
 
1514
 
1515
  # Generate inputs button
1516
  if st.button("Generate Synthetic Inputs"):
@@ -1528,6 +1582,12 @@ with tab4:
1528
  st.success(
1529
  f"Generated {len(st.session_state.synthetic_inputs)} input samples"
1530
  )
 
 
 
 
 
 
1531
 
1532
  # Display generated inputs if available
1533
  if st.session_state.synthetic_inputs:
@@ -1546,21 +1606,162 @@ with tab4:
1546
  mime="text/csv",
1547
  )
1548
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1549
  # Generate outputs button
1550
- if st.button("Generate Outputs for These Inputs"):
1551
  if not st.session_state.get("api_key"):
1552
  st.error("Please provide an OpenAI API key in the sidebar.")
 
 
1553
  else:
1554
- with st.spinner("Generating outputs for each input..."):
1555
- st.session_state.combined_data = generate_synthetic_outputs(
1556
- st.session_state.template_spec,
1557
- st.session_state.synthetic_inputs,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1558
  st.session_state.knowledge_base,
1559
  )
1560
 
1561
- if st.session_state.combined_data:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1562
  st.success(
1563
- f"Generated outputs for {len(st.session_state.combined_data)} inputs"
1564
  )
1565
 
1566
  # Display combined data if available
 
125
  if st.session_state.show_template_editor and st.session_state.template_spec:
126
  output_vars = st.session_state.template_spec.get("output", [])
127
  if output_vars:
128
+ output_specs = "Please generate output with the following specifications in JSON format:\n"
 
 
129
  for var in output_vars:
130
  output_specs += (
131
  f"- {var['name']}: {var['description']} (Type: {var['type']})"
 
135
  output_specs += "\n"
136
 
137
  # Add the output specs to the prompt
138
+ prompt = f"{prompt}\n\n{output_specs}\n\nReturn ONLY a JSON object with the output variables, with no additional text or explanation."
139
 
140
  response = client.chat.completions.create(
141
  model=model,
142
  messages=[{"role": "user", "content": prompt}],
143
  max_tokens=1000,
144
+ temperature=st.session_state.get("temperature", 0.7),
145
  )
146
+
147
+ result = response.choices[0].message.content
148
+
149
+ # Try to parse as JSON if the template has output variables
150
+ if (
151
+ st.session_state.show_template_editor
152
+ and st.session_state.template_spec
153
+ and st.session_state.template_spec.get("output")
154
+ ):
155
+ # Extract JSON from the response
156
+ json_pattern = r"```json\s*([\s\S]*?)\s*```|^\s*\{[\s\S]*\}\s*$"
157
+ json_match = re.search(json_pattern, result)
158
+
159
+ if json_match:
160
+ json_str = json_match.group(1) if json_match.group(1) else result
161
+ # Clean up any remaining markdown or comments
162
+ json_str = re.sub(r"```.*|```", "", json_str).strip()
163
+ try:
164
+ output_data = json.loads(json_str)
165
+ # Store the parsed JSON in session state for proper rendering
166
+ st.session_state.json_output = output_data
167
+ return output_data
168
+ except:
169
+ pass
170
+ else:
171
+ try:
172
+ output_data = json.loads(result)
173
+ # Store the parsed JSON in session state for proper rendering
174
+ st.session_state.json_output = output_data
175
+ return output_data
176
+ except:
177
+ pass
178
+
179
+ # If we couldn't parse as JSON or it's not meant to be JSON, return as is
180
+ return result
181
  except Exception as e:
182
  st.error(f"Error calling LLM API: {str(e)}")
183
  return f"Error: {str(e)}"
 
232
  }}
233
 
234
  Make sure the prompt includes all input variables and is designed to produce the expected outputs.
235
+ The prompt should address an LLM as if it was a combination of a system prompt and user input, and must contain information around formatting,
236
+ structure and context for the LLM to generate the desired content as derived from these instructions and/or documents.
237
  If a 'lore' or 'knowledge_base' should be incorporated, include {{lore}} in the prompt template.
238
  If document content was provided, design the template to effectively use that information.
239
  """
 
1497
  if st.session_state.generated_output:
1498
  st.header("Generated Output")
1499
  st.markdown("### Result")
 
1500
 
1501
+ # Check if the output is a dictionary (JSON)
1502
+ if isinstance(st.session_state.generated_output, dict):
1503
+ # Display as JSON
1504
+ st.json(st.session_state.generated_output)
1505
+
1506
+ # Option to save the output as JSON
1507
+ output_json = json.dumps(st.session_state.generated_output, indent=2)
1508
+ st.download_button(
1509
+ label="Download Output (JSON)",
1510
+ data=output_json,
1511
+ file_name="generated_output.json",
1512
+ mime="application/json",
1513
+ )
1514
+ else:
1515
+ # Display as text
1516
+ st.write(st.session_state.generated_output)
1517
+
1518
+ # Option to save the output as text
1519
+ st.download_button(
1520
+ label="Download Output",
1521
+ data=str(st.session_state.generated_output),
1522
+ file_name="generated_output.txt",
1523
+ mime="text/plain",
1524
+ )
1525
  else:
1526
  st.info(
1527
  "No template has been generated yet. Go to the 'Setup' tab to create one."
 
1561
  st.session_state.combined_data = []
1562
  if "show_json_columns" not in st.session_state:
1563
  st.session_state.show_json_columns = False
1564
+ if "modified_prompt_template" not in st.session_state:
1565
+ st.session_state.modified_prompt_template = ""
1566
+ if "selected_samples" not in st.session_state:
1567
+ st.session_state.selected_samples = []
1568
 
1569
  # Generate inputs button
1570
  if st.button("Generate Synthetic Inputs"):
 
1582
  st.success(
1583
  f"Generated {len(st.session_state.synthetic_inputs)} input samples"
1584
  )
1585
+ # Reset selected samples when new inputs are generated
1586
+ st.session_state.selected_samples = []
1587
+ # Reset modified prompt when new inputs are generated
1588
+ st.session_state.modified_prompt_template = (
1589
+ st.session_state.template_spec["prompt"]
1590
+ )
1591
 
1592
  # Display generated inputs if available
1593
  if st.session_state.synthetic_inputs:
 
1606
  mime="text/csv",
1607
  )
1608
 
1609
+ # Sample selection for output generation
1610
+ st.subheader("Generate Outputs")
1611
+
1612
+ # Initialize the modified prompt template if not already done
1613
+ if not st.session_state.modified_prompt_template:
1614
+ st.session_state.modified_prompt_template = (
1615
+ st.session_state.template_spec["prompt"]
1616
+ )
1617
+
1618
+ # Allow editing the prompt template
1619
+ with st.expander("View/Edit Prompt Template", expanded=False):
1620
+ st.info(
1621
+ "You can modify the prompt template used for generating outputs. Use {variable_name} to refer to input variables."
1622
+ )
1623
+
1624
+ st.session_state.modified_prompt_template = st.text_area(
1625
+ "Prompt Template",
1626
+ value=st.session_state.modified_prompt_template,
1627
+ height=200,
1628
+ )
1629
+
1630
+ # Button to reset to original template
1631
+ if st.button("Reset to Original Template"):
1632
+ st.session_state.modified_prompt_template = (
1633
+ st.session_state.template_spec["prompt"]
1634
+ )
1635
+ st.success("Prompt template reset to original")
1636
+
1637
+ # Sample selection options
1638
+ selection_method = st.radio(
1639
+ "Select samples for output generation",
1640
+ options=["Generate for all samples", "Select specific samples"],
1641
+ index=0,
1642
+ )
1643
+
1644
+ if selection_method == "Select specific samples":
1645
+ # Create a list of sample indices for selection
1646
+ sample_options = [
1647
+ f"Sample {i+1}"
1648
+ for i in range(len(st.session_state.synthetic_inputs))
1649
+ ]
1650
+
1651
+ # Allow multi-selection of samples
1652
+ selected_indices = st.multiselect(
1653
+ "Select samples to generate outputs for",
1654
+ options=range(len(sample_options)),
1655
+ format_func=lambda i: sample_options[i],
1656
+ )
1657
+
1658
+ # Store selected samples
1659
+ st.session_state.selected_samples = selected_indices
1660
+
1661
+ # Preview selected samples
1662
+ if selected_indices:
1663
+ st.write(f"Selected {len(selected_indices)} samples:")
1664
+ selected_df = pd.DataFrame(
1665
+ [st.session_state.synthetic_inputs[i] for i in selected_indices]
1666
+ )
1667
+ st.dataframe(selected_df)
1668
+ else:
1669
+ # Use all samples
1670
+ st.session_state.selected_samples = list(
1671
+ range(len(st.session_state.synthetic_inputs))
1672
+ )
1673
+
1674
+ # Preview the prompt for a selected sample
1675
+ if st.session_state.selected_samples:
1676
+ with st.expander("Preview Prompt for Sample", expanded=False):
1677
+ # Let user select which sample to preview
1678
+ preview_index = st.selectbox(
1679
+ "Select a sample to preview prompt",
1680
+ options=st.session_state.selected_samples,
1681
+ format_func=lambda i: f"Sample {i+1}",
1682
+ )
1683
+
1684
+ # Get the selected sample
1685
+ sample = st.session_state.synthetic_inputs[preview_index]
1686
+
1687
+ # Fill the prompt template with sample values
1688
+ filled_prompt = st.session_state.modified_prompt_template
1689
+ for var_name, var_value in sample.items():
1690
+ filled_prompt = filled_prompt.replace(
1691
+ f"{{{var_name}}}", str(var_value)
1692
+ )
1693
+
1694
+ # Replace {lore} with knowledge base if present
1695
+ if "{lore}" in filled_prompt:
1696
+ filled_prompt = filled_prompt.replace(
1697
+ "{lore}", st.session_state.knowledge_base
1698
+ )
1699
+
1700
+ # Show the filled prompt
1701
+ st.text_area(
1702
+ "Filled Prompt", value=filled_prompt, height=300, disabled=True
1703
+ )
1704
+
1705
  # Generate outputs button
1706
+ if st.button("Generate Outputs for Selected Samples"):
1707
  if not st.session_state.get("api_key"):
1708
  st.error("Please provide an OpenAI API key in the sidebar.")
1709
+ elif not st.session_state.selected_samples:
1710
+ st.error("No samples selected for output generation.")
1711
  else:
1712
+ # Create a copy of the template spec with the modified prompt
1713
+ modified_template = st.session_state.template_spec.copy()
1714
+ modified_template["prompt"] = (
1715
+ st.session_state.modified_prompt_template
1716
+ )
1717
+
1718
+ # Get only the selected samples
1719
+ selected_inputs = [
1720
+ st.session_state.synthetic_inputs[i]
1721
+ for i in st.session_state.selected_samples
1722
+ ]
1723
+
1724
+ with st.spinner(
1725
+ f"Generating outputs for {len(selected_inputs)} samples..."
1726
+ ):
1727
+ generated_outputs = generate_synthetic_outputs(
1728
+ modified_template,
1729
+ selected_inputs,
1730
  st.session_state.knowledge_base,
1731
  )
1732
 
1733
+ if generated_outputs:
1734
+ # If we're generating for all samples, replace the combined data
1735
+ if selection_method == "Generate for all samples":
1736
+ st.session_state.combined_data = generated_outputs
1737
+ else:
1738
+ # If we're generating for specific samples, update only those samples
1739
+ # First, ensure combined_data exists and has the right size
1740
+ if not st.session_state.combined_data or len(
1741
+ st.session_state.combined_data
1742
+ ) != len(st.session_state.synthetic_inputs):
1743
+ st.session_state.combined_data = [None] * len(
1744
+ st.session_state.synthetic_inputs
1745
+ )
1746
+
1747
+ # Update only the selected samples
1748
+ for i, output_idx in enumerate(
1749
+ st.session_state.selected_samples
1750
+ ):
1751
+ if i < len(generated_outputs):
1752
+ st.session_state.combined_data[output_idx] = (
1753
+ generated_outputs[i]
1754
+ )
1755
+
1756
+ # Remove any None values (samples that haven't been generated yet)
1757
+ st.session_state.combined_data = [
1758
+ item
1759
+ for item in st.session_state.combined_data
1760
+ if item is not None
1761
+ ]
1762
+
1763
  st.success(
1764
+ f"Generated outputs for {len(generated_outputs)} samples"
1765
  )
1766
 
1767
  # Display combined data if available