Update app.py
Browse files
app.py
CHANGED
|
@@ -20,11 +20,6 @@ login(token=hf_token)
|
|
| 20 |
|
| 21 |
# SmolAgent initialization
|
| 22 |
model = HfApiModel("mistralai/Mixtral-8x7B-Instruct-v0.1", token=hf_token)
|
| 23 |
-
agent = CodeAgent(
|
| 24 |
-
tools=[],
|
| 25 |
-
model=model,
|
| 26 |
-
additional_authorized_imports=["numpy", "pandas", "matplotlib.pyplot", "seaborn", "sklearn"]
|
| 27 |
-
)
|
| 28 |
|
| 29 |
df_global = None
|
| 30 |
|
|
@@ -48,41 +43,43 @@ def upload_file(file):
|
|
| 48 |
|
| 49 |
import textwrap
|
| 50 |
|
| 51 |
-
def run_agent(_):
|
| 52 |
-
try:
|
| 53 |
-
# Check if df_global is not None, meaning a file was uploaded and cleaned
|
| 54 |
-
if df_global is None:
|
| 55 |
-
return "Error: No dataset uploaded."
|
| 56 |
-
|
| 57 |
-
# Pass the dataset to the agent and generate insights/visualizations
|
| 58 |
-
prompt = """You are an expert data scientist.
|
| 59 |
-
Please generate 5 data insights and 5 data visualizations for the provided dataset.
|
| 60 |
-
Visualizations should be saved in the current working directory."""
|
| 61 |
-
|
| 62 |
-
# Call the agent's run method, passing the dataset as input
|
| 63 |
-
output = agent.run(
|
| 64 |
-
prompt, # Pass the instructions as a string
|
| 65 |
-
additional_args=dict(dataset=df_global.to_dict(), additional_notes="Additional notes or analysis can go here.")
|
| 66 |
-
)
|
| 67 |
-
|
| 68 |
-
# Print or log the output to inspect it
|
| 69 |
-
print("SmolAgent Output:", output)
|
| 70 |
-
|
| 71 |
-
# Clean and fix indentation issues
|
| 72 |
-
if isinstance(output, str):
|
| 73 |
-
cleaned_output = textwrap.dedent(output) # This will remove leading indentation
|
| 74 |
-
exec(cleaned_output)
|
| 75 |
-
return "Insights and visualizations have been generated successfully."
|
| 76 |
-
else:
|
| 77 |
-
return f"Unexpected output format: {type(output)}. The output is not a string."
|
| 78 |
|
| 79 |
-
|
| 80 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 81 |
|
| 82 |
|
| 83 |
def train_model(_):
|
| 84 |
wandb.login(key=os.environ.get("WANDB_API_KEY"))
|
| 85 |
-
wandb_run = wandb.init(project="huggingface-data-analysis", name="Optuna_Run", reinit=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 86 |
|
| 87 |
target = df_global.columns[-1]
|
| 88 |
X = df_global.drop(target, axis=1)
|
|
|
|
| 20 |
|
| 21 |
# SmolAgent initialization
|
| 22 |
model = HfApiModel("mistralai/Mixtral-8x7B-Instruct-v0.1", token=hf_token)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
|
| 24 |
df_global = None
|
| 25 |
|
|
|
|
| 43 |
|
| 44 |
import textwrap
|
| 45 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 46 |
|
| 47 |
+
# Initialize the agent
|
| 48 |
+
agent = CodeAgent(
|
| 49 |
+
tools=[],
|
| 50 |
+
model=model,
|
| 51 |
+
additional_authorized_imports=["numpy", "pandas", "matplotlib.pyplot", "seaborn"]
|
| 52 |
+
)
|
| 53 |
+
|
| 54 |
+
# Define the analysis instructions
|
| 55 |
+
analysis_result = agent.run(
|
| 56 |
+
"""
|
| 57 |
+
You are an expert data analyst. Perform comprehensive analysis including:
|
| 58 |
+
1. Basic statistics and data quality checks.
|
| 59 |
+
2. Three insightful analytical questions about relationships in the data.
|
| 60 |
+
3. Visualization of key patterns and correlations.
|
| 61 |
+
4. Actionable real-world insights derived from findings.
|
| 62 |
+
Generate publication-quality visualizations and save them to './figures/'.
|
| 63 |
+
""",
|
| 64 |
+
additional_args={
|
| 65 |
+
"additional_notes": additional_notes, # Any additional analysis notes
|
| 66 |
+
"source_file": csv_file # The path to the input CSV file
|
| 67 |
+
}
|
| 68 |
+
)
|
| 69 |
+
|
| 70 |
+
# Optionally, you can inspect the results
|
| 71 |
+
print(analysis_result)
|
| 72 |
|
| 73 |
|
| 74 |
def train_model(_):
|
| 75 |
wandb.login(key=os.environ.get("WANDB_API_KEY"))
|
| 76 |
+
#wandb_run = wandb.init(project="huggingface-data-analysis", name="Optuna_Run", reinit=True)
|
| 77 |
+
# At the start of your script
|
| 78 |
+
run_counter = 1
|
| 79 |
+
|
| 80 |
+
# Then when initializing
|
| 81 |
+
wandb_run = wandb.init(project="huggingface-data-analysis", name=f"Optuna_Run_{run_counter}", reinit=True)
|
| 82 |
+
run_counter += 1
|
| 83 |
|
| 84 |
target = df_global.columns[-1]
|
| 85 |
X = df_global.drop(target, axis=1)
|