AutoStartup.ai / app.py
Sami Halawa
Configure AutoStartup.ai to use Gemini API
efcf330
# -*- coding: utf-8 -*-
import gradio as gr
import json
from utils import parse_evaluation_json
from ai_scientist.generate_ideas import generate_temp_free_idea
from generator import IdeaGenerator
import time
import xlsxwriter
import pandas as pd
import os
from openai import OpenAI
DEFAULT_IDEAS_COUNT = 10
DEFAULT_RANDOM_SEED = 42
OUTPUT_FILE = 'ranked_ideas_output.json'
GOOD_IDEAS_FILE = 'good_ideas.jsonl'
BAD_IDEAS_FILE = 'bad_ideas.jsonl'
SLEEP_TIME = 2
initialization_error = None
generator = None
try:
generator = IdeaGenerator()
print("βœ… IdeaGenerator initialized successfully")
except Exception as e:
initialization_error = f"IdeaGenerator initialization failed: {str(e)}"
print(f"⚠️ {initialization_error}")
generator = None
def generate_and_evaluate(query, ideas_count, random_seed, progress=gr.Progress()):
if generator is None:
yield "❌ Generator not initialized. Please check your API keys.", pd.DataFrame(), "$0.00", [], pd.DataFrame()
return
generator.set_seed(random_seed)
start_time = time.time()
yield "Loading papers...", pd.DataFrame(), "$0.00", None, pd.DataFrame()
papers_to_process = generator.get_paper_list(ideas_count)
progress(0, desc="Generating ideas...")
ranked_ideas, total_cost = generator.generate_and_evaluate(papers_to_process, query, progress)
total_time = time.time() - start_time
print(f"Total cost: {total_cost:.2f}, Total time: {total_time:.2f}s")
for idea_info in ranked_ideas:
idea_info['query'] = query
with open(OUTPUT_FILE, 'w', encoding='utf-8') as f:
json.dump(ranked_ideas, f, indent=4, ensure_ascii=False)
print(f"ideas are saved into {OUTPUT_FILE}")
# combined_idea = generator.combine_ideas(ranked_ideas[: min(5, len(ranked_ideas))], query)
# with open('combined_idea.json', 'w', encoding='utf-8') as f:
# json.dump(combined_idea, f, indent=4, ensure_ascii=False)
display_data = []
for i, idea_info in enumerate(ranked_ideas):
thought = idea_info.get('Thought', 'N/A')
title = idea_info.get('Title', 'N/A')
motivation = idea_info.get('Motivation', 'N/A')
tdlr = idea_info.get('TDLR', 'N/A')
idea = idea_info.get('Idea', 'N/A')
experiment_text = idea_info.get('Experiment', 'N/A')
idea_description = "THOUGHT:\n" + thought + "\n\n" + 'TITLE:\n' + title + "\n\n" + "TDLR:\n" + tdlr + "\n\n" + "MOTIVATION:\n" + motivation + '\n\n' + "IDEA:\n" + idea + '\n\n' + "EXPERIMENT:\n" + experiment_text
evaluation_raw = idea_info.get('evaluation_raw', 'N/A')
combined_score = idea_info.get('combined_score', 'N/A')
display_score = combined_score if isinstance(combined_score, (int, float)) else 'N/A'
display_data.append({
"Rank": i + 1,
"Startup Idea": idea_description,
"Score": display_score,
"Market Research Source": idea_info.get('paper_title', 'N/A').split('/')[-1],
"Business Details": evaluation_raw,
})
# Displaying results in the first table
results_df = pd.DataFrame(display_data)
if "Score" in results_df.columns:
results_df["Score"] = pd.to_numeric(results_df["Score"], errors='coerce')
results_df["Score"] = results_df["Score"].apply(lambda x: x if pd.notna(x) else 'N/A')
yield f"Done. Processed {len(ranked_ideas)} ideas in {total_time:.2f}s.", results_df, f"${total_cost:.2f}", ranked_ideas, pd.DataFrame()
# return
# --- Compare using generate_and_evaluate2 method ---
# Create Gemini client for comparison
gemini_client = OpenAI(
api_key=os.getenv("OPENAI_API_KEY"),
base_url=os.getenv("OPENAI_API_ENDPOINT", "https://generativelanguage.googleapis.com/v1beta/openai/")
)
ideas = generate_temp_free_idea('ai_scientist/test.json', gemini_client, 'gemini-2.0-flash-exp', query, ideas_count//4, 4, False)
comparison_ranked_ideas, cost = generator.evaluate(ideas, query, progress)
with open('comparison_ranked_ideas.json', 'w', encoding='utf-8') as f:
json.dump(comparison_ranked_ideas, f, indent=4, ensure_ascii=False)
comparison_display_data = []
for i, idea_info in enumerate(comparison_ranked_ideas):
# thought = idea_info.get('Thought', 'N/A')
title = idea_info.get('Title', 'N/A')
# motivation = idea_info.get('Motivation', 'N/A')
abstract = idea_info.get('Abstract', 'N/A')
short = idea_info.get('Short Hypothesis', 'N/A')
idea_description = 'TITLE:\n' + title + "\n\n" + "Short Hypothesis:\n" + short + "\n\n" + "Abstract:\n" + abstract
experiment_text = idea_info.get('Experiments', 'N/A')
if isinstance(experiment_text, dict):
experiment_text = json.dumps(experiment_text, indent=4, ensure_ascii=False)
combined_score = idea_info.get('combined_score', 'N/A')
display_score = combined_score if isinstance(combined_score, (int, float)) else 'N/A'
comparison_display_data.append({
"Rank": i + 1,
"Startup Idea": idea_description,
"Score": display_score,
"Market Research Source": idea_info.get('paper_title', 'N/A').split('/')[-1],
"Business Details": experiment_text,
})
comparison_results_df = pd.DataFrame(comparison_display_data)
if "Score" in comparison_results_df.columns:
comparison_results_df["Score"] = pd.to_numeric(comparison_results_df["Score"], errors='coerce')
comparison_results_df["Score"] = comparison_results_df["Score"].apply(lambda x: x if pd.notna(x) else 'N/A')
total_cost += cost
progress(1.0)
yield f"Done. Processed {len(ranked_ideas)} ideas in {total_time:.2f}s.", results_df, f"${total_cost:.2f}", ranked_ideas, comparison_results_df
def like_idea(row_number, ranked_ideas):
idx = int(row_number) - 1
if not ranked_ideas or idx < 0 or idx >= len(ranked_ideas):
return "invalid rank"
idea_info = ranked_ideas[idx]
with open(GOOD_IDEAS_FILE, "a", encoding="utf-8") as f:
f.write(json.dumps(idea_info, ensure_ascii=False) + "\n")
return f"the {row_number}-th startup idea has been recorded as viable"
def dislike_idea(row_number, ranked_ideas):
idx = int(row_number) - 1
if not ranked_ideas or idx < 0 or idx >= len(ranked_ideas):
return "invalid rank"
idea_info = ranked_ideas[idx]
with open(BAD_IDEAS_FILE, "a", encoding="utf-8") as f:
f.write(json.dumps(idea_info, ensure_ascii=False) + "\n")
return f"the {row_number}-th startup idea has been recorded as not viable"
def export_xlsx(ranked_ideas):
if not ranked_ideas:
return None
rows = []
for i, idea_info in enumerate(ranked_ideas):
idea_data_dict = idea_info.get('idea', {})
full_idea_text = idea_data_dict.get('idea', 'N/A') if isinstance(idea_data_dict, dict) else str(idea_data_dict)
parts = full_idea_text.split('NEW IDEA JSON:')
idea_description = parts[0].strip() if parts else 'N/A'
experiment_text = parts[-1].strip() if len(parts) > 1 else 'N/A'
if isinstance(experiment_text, dict):
experiment_text = json.dumps(experiment_text, ensure_ascii=False)
rows.append({
"Rank": i + 1,
"Idea": idea_description.replace('THOUGHT:', '').strip(),
"Score": idea_info.get('combined_score', 'N/A'),
"Source Paper": idea_info.get('paper_title', 'N/A').split('/')[-1],
"Details": experiment_text,
})
df = pd.DataFrame(rows)
timestamp = time.strftime("%Y%m%d_%H%M%S")
filename = f"./idea_arxiv/ranked_ideas_{timestamp}.xlsx"
with pd.ExcelWriter(filename, engine="xlsxwriter") as writer:
df.to_excel(writer, index=False, sheet_name="Ideas")
ws = writer.sheets["Ideas"]
# wrap_format = writer.book.add_format({'text_wrap': True})
for col_num, col in enumerate(df.columns):
max_width = min(40, max(df[col].astype(str).str.len().max(), len(col)))
ws.set_column(col_num, col_num, max_width)
return filename
# --- Gradio Interface ---
with gr.Blocks(theme=gr.themes.Soft(), css_paths=['./gradio.css']) as demo:
gr.Markdown(
"""
# πŸš€ AutoStartup.ai - Startup Idea Generator πŸš€
Enter a market problem or industry focus, and this tool will generate innovative startup ideas
based on market research and trends, then evaluate and rank them based on Market Potential,
Technical Feasibility, Scalability, and Competitive Advantage.
"""
)
with gr.Row():
with gr.Column(scale=2):
query_input = gr.Textbox(
label="Market Problem or Industry Focus",
placeholder='''e.g. Remote work has fundamentally changed how people collaborate, but current tools still suffer from poor user experience, lack of seamless integration, and difficulty maintaining team culture. Many professionals struggle with productivity in distributed teams, effective asynchronous communication, and maintaining work-life balance.
To address challenges in remote work collaboration (such as team productivity, communication gaps, cultural disconnect, etc), you should come up with an innovative and scalable startup idea.''',
lines=6
)
submit_button = gr.Button("πŸš€ Generate Startup Ideas", variant="primary")
gr.Markdown("---")
gr.Markdown("### Like / Dislike")
with gr.Row():
row_input = gr.Number(label="rank", value=1, precision=0)
status_box = gr.Textbox(label="status", interactive=False)
with gr.Column(scale=1):
like_button = gr.Button("πŸ’‘ Great Startup Idea!")
dislike_button = gr.Button("❌ Not Viable")
with gr.Column(scale=1):
cost_output = gr.Textbox(label="Estimated Cost", interactive=False, value="$0.00")
ideas_count_input = gr.Number(
label="Idea Number",
value=DEFAULT_IDEAS_COUNT,
precision=0
)
seed_input = gr.Number(
label="Seed",
value=DEFAULT_RANDOM_SEED,
precision=0
)
status_output = gr.Textbox(label="Status", interactive=False, value="Idle")
gr.Markdown("---")
gr.Markdown("## πŸ† Ranked Startup Ideas")
results_output = gr.DataFrame(
headers=["Rank", "Startup Idea", "Score", "Market Research Source", "Business Details"],
datatype=["number", "str", "number", "str", "str"],
label="Market-Validated Ideas",
elem_id="results-dataframe",
row_count=(10, "dynamic"),
wrap=True
)
gr.Markdown("---")
gr.Markdown("## πŸ”„ Alternative Generation Results")
# New comparison results table
comparison_results_output = gr.DataFrame(
headers=["Rank", "Startup Idea", "Score", "Market Research Source", "Business Details"],
datatype=["number", "str", "number", "str", "str"],
label="Alternative Approach Results",
elem_id="comparison-results-dataframe",
row_count=(10, "dynamic"),
wrap=True
)
results_state = gr.State()
download_button = gr.DownloadButton(
label="πŸ“Š Download Business Plan Template",
value=export_xlsx,
inputs=[results_state]
)
submit_button.click(
fn=generate_and_evaluate,
inputs=[query_input, ideas_count_input, seed_input],
outputs=[status_output, results_output, cost_output, results_state, comparison_results_output]
)
like_button.click(
fn=like_idea,
inputs=[row_input, results_state],
outputs=status_box
)
dislike_button.click(
fn=dislike_idea,
inputs=[row_input, results_state],
outputs=status_box
)
if initialization_error is not None:
gr.Markdown(
f"""
<div style='color: red; font-weight: bold; border: 1px solid red; padding: 10px; border-radius: 5px;'>
⚠️ Initialization Error: {initialization_error}
</div>
"""
)
if __name__ == "__main__":
demo.launch()