Spaces:
Paused
Paused
Upload folder using huggingface_hub
Browse files- app.py +57 -22
- test_models.py +49 -0
app.py
CHANGED
|
@@ -43,18 +43,24 @@ logger = logging.getLogger("agent-app")
|
|
| 43 |
PLANNING_SYSTEM_PROMPT = """You are an autonomous planning agent assisting in project ideation.
|
| 44 |
Your goal is to help the user refine their idea into a concrete project structure.
|
| 45 |
|
| 46 |
-
### Sequential Thinking
|
| 47 |
-
You MUST
|
| 48 |
-
1.
|
| 49 |
2. Reflect on the feasibility and security of the architecture.
|
| 50 |
3. Reason about potential API integrations and future use cases.
|
| 51 |
4. Iterate on your thoughts until you have a complete vision.
|
| 52 |
|
| 53 |
### Process
|
| 54 |
-
-
|
| 55 |
-
- Continue thinking until you have covered all aspects of the required structured fields
|
| 56 |
-
|
| 57 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 58 |
|
| 59 |
### Guidelines
|
| 60 |
- Be thorough, professional, and visionary.
|
|
@@ -117,7 +123,7 @@ HELMHOLTZ_BASE_URL = "https://api.helmholtz-blablador.fz-juelich.de/v1"
|
|
| 117 |
api_key = os.environ.get("BLABLADOR_API_KEY") or "EMPTY_KEY"
|
| 118 |
|
| 119 |
chat_llm = ChatOpenAI(
|
| 120 |
-
model="alias-
|
| 121 |
base_url=HELMHOLTZ_BASE_URL,
|
| 122 |
api_key=api_key,
|
| 123 |
max_tokens=2048,
|
|
@@ -125,7 +131,7 @@ chat_llm = ChatOpenAI(
|
|
| 125 |
)
|
| 126 |
|
| 127 |
code_llm = ChatOpenAI(
|
| 128 |
-
model="alias-
|
| 129 |
base_url=HELMHOLTZ_BASE_URL,
|
| 130 |
api_key=api_key,
|
| 131 |
max_tokens=1024,
|
|
@@ -141,13 +147,21 @@ fast_llm = ChatOpenAI(
|
|
| 141 |
)
|
| 142 |
|
| 143 |
huge_llm = ChatOpenAI(
|
| 144 |
-
model="alias-
|
| 145 |
base_url=HELMHOLTZ_BASE_URL,
|
| 146 |
api_key=api_key,
|
| 147 |
max_tokens=1024,
|
| 148 |
max_retries=0
|
| 149 |
)
|
| 150 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 151 |
async def ainvoke_with_retry(llm, prompt, max_retries=5, delay=30):
|
| 152 |
"""Wrapper for LLM calls with retry logic to handle intermittent provider errors."""
|
| 153 |
last_error = None
|
|
@@ -786,11 +800,13 @@ async def handle_close_ideate(history, persona="planning"):
|
|
| 786 |
}
|
| 787 |
|
| 788 |
extracted = {}
|
|
|
|
|
|
|
| 789 |
for key, instruction in fields_config.items():
|
| 790 |
try:
|
| 791 |
prompt = f"{instruction}\n\nLOG:\n{chat_text}\n\nRespond ONLY with the extracted content for this field. Do not add conversational filler."
|
| 792 |
-
logger.info(f"Extracting {key}
|
| 793 |
-
response = await ainvoke_with_retry(
|
| 794 |
content = response.content
|
| 795 |
if "<think>" in content:
|
| 796 |
content = content.split("</think>")[-1].strip()
|
|
@@ -1023,6 +1039,10 @@ async def handle_supervisor_nudge(session_id, log_file=""):
|
|
| 1023 |
if not session_id:
|
| 1024 |
return "No session ID."
|
| 1025 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1026 |
tools = await get_all_tools()
|
| 1027 |
|
| 1028 |
# Load context from log_file if provided
|
|
@@ -1063,7 +1083,7 @@ async def handle_supervisor_nudge(session_id, log_file=""):
|
|
| 1063 |
"Respond with the EXACT message you want to send to Jules."
|
| 1064 |
)
|
| 1065 |
|
| 1066 |
-
response = await ainvoke_with_retry(
|
| 1067 |
{"role": "system", "content": system_msg},
|
| 1068 |
{"role": "user", "content": prompt}
|
| 1069 |
])
|
|
@@ -1131,8 +1151,8 @@ async def load_github_branches(repo_url, default_owner="JsonLord"):
|
|
| 1131 |
logger.error(f"Error loading branches: {e}")
|
| 1132 |
return gr.update(choices=["main"], value="main")
|
| 1133 |
|
| 1134 |
-
async def handle_jules_comm(
|
| 1135 |
-
if not
|
| 1136 |
return "No repository URL/source provided.", ""
|
| 1137 |
|
| 1138 |
all_tools = await get_all_tools()
|
|
@@ -1144,7 +1164,13 @@ async def handle_jules_comm(repo_url, branch="main", log_file="", hf_prof_fallba
|
|
| 1144 |
]
|
| 1145 |
tools = [t for t in all_tools if t.name in jules_tool_names]
|
| 1146 |
|
| 1147 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1148 |
|
| 1149 |
hf_profile, hf_space, hf_token = hf_prof_fallback, hf_space_fallback, hf_token_fallback
|
| 1150 |
if log_file:
|
|
@@ -1159,7 +1185,9 @@ async def handle_jules_comm(repo_url, branch="main", log_file="", hf_prof_fallba
|
|
| 1159 |
hf_token = get_hf_token(hf_profile) or hf_token_fallback
|
| 1160 |
|
| 1161 |
base_instruction = (
|
| 1162 |
-
"important!:
|
|
|
|
|
|
|
| 1163 |
"The current application should be kept and only slightly changed, improved, expanded by the features described in /jules folder. "
|
| 1164 |
"The tasks files give a starting point, but judge for yourself. Develop tests to run to first see the current application feature, how it is working and make a plan to implement the features requested in /jules folder to develop the current application inside the repo towards the vision described to achieve expansion of the current app with functionalities described in the /jules folder. "
|
| 1165 |
"Adapt your coding implemententation to the coding language used by the project, and try to go with that. Test the full app within this working space.\n\n"
|
|
@@ -1787,8 +1815,9 @@ with gr.Blocks() as demo:
|
|
| 1787 |
async def respond(message, chat_history):
|
| 1788 |
async for updated_history in handle_chat(message, chat_history, persona="planning"):
|
| 1789 |
yield "", updated_history
|
| 1790 |
-
|
| 1791 |
-
|
|
|
|
| 1792 |
plan_back_btn.click(lambda: (gr.update(visible=True), gr.update(visible=False)), None, [planning_chat_col, planning_structured_col])
|
| 1793 |
|
| 1794 |
with gr.Tab("Push Files") as push_files_tab:
|
|
@@ -1880,8 +1909,7 @@ with gr.Blocks() as demo:
|
|
| 1880 |
res, sid = await handle_jules_comm(repo, branch, log_file, hf_prof, hf_space, hf_token)
|
| 1881 |
return res, sid
|
| 1882 |
|
| 1883 |
-
comm_btn.click(start_comm_and_monitor, [repo_input, branch_dropdown, log_dropdown_comm, hf_profile_comm, hf_space_comm, hf_token_comm], [comm_output, mon_session_id])
|
| 1884 |
-
.then(handle_supervisor_nudge, inputs=[mon_session_id, log_dropdown_comm], outputs=[nudge_output])
|
| 1885 |
|
| 1886 |
with gr.Tab("Container Logs"):
|
| 1887 |
log_view = gr.Textbox(label="app.log (last 100 lines)", lines=25, interactive=False)
|
|
@@ -1904,12 +1932,19 @@ with gr.Blocks() as demo:
|
|
| 1904 |
[mentor_chatbot],
|
| 1905 |
[mentor_chat_col, mentor_structured_col, mentor_proj_desc, mentor_tasks_tests, mentor_repos, mentor_expectations, mentor_endpoints, mentor_prof, mentor_space, mentor_token, mentor_status, log_dropdown, log_dropdown_comm]
|
| 1906 |
)
|
|
|
|
|
|
|
|
|
|
| 1907 |
close_btn.click(
|
| 1908 |
planning_close_wrap,
|
| 1909 |
[chatbot],
|
| 1910 |
-
|
| 1911 |
)
|
| 1912 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1913 |
# Log Refresh Logic
|
| 1914 |
refresh_logs_btn_1.click(refresh_logs_ui, outputs=[log_dropdown, log_dropdown_comm])
|
| 1915 |
refresh_logs_btn_2.click(refresh_logs_ui, outputs=[log_dropdown, log_dropdown_comm])
|
|
|
|
| 43 |
PLANNING_SYSTEM_PROMPT = """You are an autonomous planning agent assisting in project ideation.
|
| 44 |
Your goal is to help the user refine their idea into a concrete project structure.
|
| 45 |
|
| 46 |
+
### Predefined Pipeline: Sequential Thinking
|
| 47 |
+
You MUST start your response by using the `sequentialthinking` tool to:
|
| 48 |
+
1. Analyze the user's initial idea and break it down into logical components.
|
| 49 |
2. Reflect on the feasibility and security of the architecture.
|
| 50 |
3. Reason about potential API integrations and future use cases.
|
| 51 |
4. Iterate on your thoughts until you have a complete vision.
|
| 52 |
|
| 53 |
### Process
|
| 54 |
+
- You MUST use `sequentialthinking` at the beginning of the session based on the idea given.
|
| 55 |
+
- Continue thinking until you have covered all aspects of the required structured fields:
|
| 56 |
+
1. Project Description (vision, goals, integrations)
|
| 57 |
+
2. Tasks and Tests (following the 7-point template)
|
| 58 |
+
3. GitHub Repos
|
| 59 |
+
4. Functionality Expectations
|
| 60 |
+
5. API Endpoints
|
| 61 |
+
6. Huggingface Deployment Data
|
| 62 |
+
- Once you are finished thinking, provide your final answer to the user.
|
| 63 |
+
- After your final answer, the session will automatically close and populate the structured fields.
|
| 64 |
|
| 65 |
### Guidelines
|
| 66 |
- Be thorough, professional, and visionary.
|
|
|
|
| 123 |
api_key = os.environ.get("BLABLADOR_API_KEY") or "EMPTY_KEY"
|
| 124 |
|
| 125 |
chat_llm = ChatOpenAI(
|
| 126 |
+
model="alias-fast",
|
| 127 |
base_url=HELMHOLTZ_BASE_URL,
|
| 128 |
api_key=api_key,
|
| 129 |
max_tokens=2048,
|
|
|
|
| 131 |
)
|
| 132 |
|
| 133 |
code_llm = ChatOpenAI(
|
| 134 |
+
model="alias-fast",
|
| 135 |
base_url=HELMHOLTZ_BASE_URL,
|
| 136 |
api_key=api_key,
|
| 137 |
max_tokens=1024,
|
|
|
|
| 147 |
)
|
| 148 |
|
| 149 |
huge_llm = ChatOpenAI(
|
| 150 |
+
model="alias-fast",
|
| 151 |
base_url=HELMHOLTZ_BASE_URL,
|
| 152 |
api_key=api_key,
|
| 153 |
max_tokens=1024,
|
| 154 |
max_retries=0
|
| 155 |
)
|
| 156 |
|
| 157 |
+
large_llm = ChatOpenAI(
|
| 158 |
+
model="alias-large",
|
| 159 |
+
base_url=HELMHOLTZ_BASE_URL,
|
| 160 |
+
api_key=api_key,
|
| 161 |
+
max_tokens=2048,
|
| 162 |
+
max_retries=0
|
| 163 |
+
)
|
| 164 |
+
|
| 165 |
async def ainvoke_with_retry(llm, prompt, max_retries=5, delay=30):
|
| 166 |
"""Wrapper for LLM calls with retry logic to handle intermittent provider errors."""
|
| 167 |
last_error = None
|
|
|
|
| 800 |
}
|
| 801 |
|
| 802 |
extracted = {}
|
| 803 |
+
extraction_llm = large_llm if persona == "mentor" else fast_llm
|
| 804 |
+
|
| 805 |
for key, instruction in fields_config.items():
|
| 806 |
try:
|
| 807 |
prompt = f"{instruction}\n\nLOG:\n{chat_text}\n\nRespond ONLY with the extracted content for this field. Do not add conversational filler."
|
| 808 |
+
logger.info(f"Extracting {key} for {persona}...")
|
| 809 |
+
response = await ainvoke_with_retry(extraction_llm, prompt)
|
| 810 |
content = response.content
|
| 811 |
if "<think>" in content:
|
| 812 |
content = content.split("</think>")[-1].strip()
|
|
|
|
| 1039 |
if not session_id:
|
| 1040 |
return "No session ID."
|
| 1041 |
|
| 1042 |
+
# Wait 180 seconds as requested before retrieving activities
|
| 1043 |
+
logger.info(f"Supervisor waiting 180s before retrieving activities for {session_id}...")
|
| 1044 |
+
await asyncio.sleep(180)
|
| 1045 |
+
|
| 1046 |
tools = await get_all_tools()
|
| 1047 |
|
| 1048 |
# Load context from log_file if provided
|
|
|
|
| 1083 |
"Respond with the EXACT message you want to send to Jules."
|
| 1084 |
)
|
| 1085 |
|
| 1086 |
+
response = await ainvoke_with_retry(fast_llm, [
|
| 1087 |
{"role": "system", "content": system_msg},
|
| 1088 |
{"role": "user", "content": prompt}
|
| 1089 |
])
|
|
|
|
| 1151 |
logger.error(f"Error loading branches: {e}")
|
| 1152 |
return gr.update(choices=["main"], value="main")
|
| 1153 |
|
| 1154 |
+
async def handle_jules_comm(repo_url_input, branch="main", log_file="", hf_prof_fallback="", hf_space_fallback="", hf_token_fallback=""):
|
| 1155 |
+
if not repo_url_input:
|
| 1156 |
return "No repository URL/source provided.", ""
|
| 1157 |
|
| 1158 |
all_tools = await get_all_tools()
|
|
|
|
| 1164 |
]
|
| 1165 |
tools = [t for t in all_tools if t.name in jules_tool_names]
|
| 1166 |
|
| 1167 |
+
# Ensure we have the full HTTPS URL for instructions
|
| 1168 |
+
full_repo_url = repo_url_input
|
| 1169 |
+
if not full_repo_url.startswith("http"):
|
| 1170 |
+
full_repo_url = f"https://github.com/{repo_url_input.replace('sources/github/', '')}"
|
| 1171 |
+
if not full_repo_url.endswith(".git"): full_repo_url += ".git"
|
| 1172 |
+
|
| 1173 |
+
repo_url = transform_github_url(repo_url_input)
|
| 1174 |
|
| 1175 |
hf_profile, hf_space, hf_token = hf_prof_fallback, hf_space_fallback, hf_token_fallback
|
| 1176 |
if log_file:
|
|
|
|
| 1185 |
hf_token = get_hf_token(hf_profile) or hf_token_fallback
|
| 1186 |
|
| 1187 |
base_instruction = (
|
| 1188 |
+
f"important!: You must first clone the repository into your current workspace using its full HTTPS link: {full_repo_url}. "
|
| 1189 |
+
"Investigate the existing codebase to understand its structure and features. "
|
| 1190 |
+
"The current codebase you are working in is to be adapted following the vision of the project transformation in the /jules folder. "
|
| 1191 |
"The current application should be kept and only slightly changed, improved, expanded by the features described in /jules folder. "
|
| 1192 |
"The tasks files give a starting point, but judge for yourself. Develop tests to run to first see the current application feature, how it is working and make a plan to implement the features requested in /jules folder to develop the current application inside the repo towards the vision described to achieve expansion of the current app with functionalities described in the /jules folder. "
|
| 1193 |
"Adapt your coding implemententation to the coding language used by the project, and try to go with that. Test the full app within this working space.\n\n"
|
|
|
|
| 1815 |
async def respond(message, chat_history):
|
| 1816 |
async for updated_history in handle_chat(message, chat_history, persona="planning"):
|
| 1817 |
yield "", updated_history
|
| 1818 |
+
|
| 1819 |
+
p_click = submit_btn.click(respond, [msg, chatbot], [msg, chatbot])
|
| 1820 |
+
p_submit = msg.submit(respond, [msg, chatbot], [msg, chatbot])
|
| 1821 |
plan_back_btn.click(lambda: (gr.update(visible=True), gr.update(visible=False)), None, [planning_chat_col, planning_structured_col])
|
| 1822 |
|
| 1823 |
with gr.Tab("Push Files") as push_files_tab:
|
|
|
|
| 1909 |
res, sid = await handle_jules_comm(repo, branch, log_file, hf_prof, hf_space, hf_token)
|
| 1910 |
return res, sid
|
| 1911 |
|
| 1912 |
+
comm_btn.click(start_comm_and_monitor, [repo_input, branch_dropdown, log_dropdown_comm, hf_profile_comm, hf_space_comm, hf_token_comm], [comm_output, mon_session_id])
|
|
|
|
| 1913 |
|
| 1914 |
with gr.Tab("Container Logs"):
|
| 1915 |
log_view = gr.Textbox(label="app.log (last 100 lines)", lines=25, interactive=False)
|
|
|
|
| 1932 |
[mentor_chatbot],
|
| 1933 |
[mentor_chat_col, mentor_structured_col, mentor_proj_desc, mentor_tasks_tests, mentor_repos, mentor_expectations, mentor_endpoints, mentor_prof, mentor_space, mentor_token, mentor_status, log_dropdown, log_dropdown_comm]
|
| 1934 |
)
|
| 1935 |
+
|
| 1936 |
+
planning_outputs = [planning_chat_col, planning_structured_col, plan_proj_desc, plan_tasks_tests, plan_repos, plan_expectations, plan_endpoints, plan_prof, plan_space, plan_token, ideate_status, log_dropdown, log_dropdown_comm]
|
| 1937 |
+
|
| 1938 |
close_btn.click(
|
| 1939 |
planning_close_wrap,
|
| 1940 |
[chatbot],
|
| 1941 |
+
planning_outputs
|
| 1942 |
)
|
| 1943 |
|
| 1944 |
+
# Planning session auto-pipeline (trigger close after final answer)
|
| 1945 |
+
p_click.then(planning_close_wrap, [chatbot], planning_outputs)
|
| 1946 |
+
p_submit.then(planning_close_wrap, [chatbot], planning_outputs)
|
| 1947 |
+
|
| 1948 |
# Log Refresh Logic
|
| 1949 |
refresh_logs_btn_1.click(refresh_logs_ui, outputs=[log_dropdown, log_dropdown_comm])
|
| 1950 |
refresh_logs_btn_2.click(refresh_logs_ui, outputs=[log_dropdown, log_dropdown_comm])
|
test_models.py
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import asyncio
|
| 2 |
+
import os
|
| 3 |
+
import json
|
| 4 |
+
import re
|
| 5 |
+
from unittest.mock import AsyncMock, MagicMock, patch
|
| 6 |
+
import sys
|
| 7 |
+
|
| 8 |
+
# Mock modules
|
| 9 |
+
sys.modules['gradio'] = MagicMock()
|
| 10 |
+
sys.modules['git'] = MagicMock()
|
| 11 |
+
|
| 12 |
+
import app
|
| 13 |
+
from langchain_core.messages import AIMessage
|
| 14 |
+
|
| 15 |
+
async def test_persona_models():
|
| 16 |
+
# Reset model mocks if they were set
|
| 17 |
+
# In app.py:
|
| 18 |
+
# extraction_llm = large_llm if persona == "mentor" else fast_llm
|
| 19 |
+
|
| 20 |
+
app.large_llm = MagicMock()
|
| 21 |
+
app.fast_llm = MagicMock()
|
| 22 |
+
app.get_ideation_logs = MagicMock(return_value=[])
|
| 23 |
+
|
| 24 |
+
# Mock ainvoke_with_retry to check which LLM is called
|
| 25 |
+
called_llms = []
|
| 26 |
+
async def mock_invoke(llm, prompt, **kwargs):
|
| 27 |
+
called_llms.append(llm)
|
| 28 |
+
return AIMessage(content="Extracted")
|
| 29 |
+
|
| 30 |
+
with patch("app.ainvoke_with_retry", side_effect=mock_invoke), \
|
| 31 |
+
patch("os.makedirs"), patch("builtins.open", MagicMock()):
|
| 32 |
+
|
| 33 |
+
# Mentor Persona
|
| 34 |
+
print("Testing Mentor extraction (should use large_llm)...")
|
| 35 |
+
await app.handle_close_ideate([{"role": "user", "content": "hi"}], persona="mentor")
|
| 36 |
+
# In handle_close_ideate, it makes 8 calls
|
| 37 |
+
assert all(l == app.large_llm for l in called_llms[:8])
|
| 38 |
+
|
| 39 |
+
called_llms.clear()
|
| 40 |
+
|
| 41 |
+
# Planning Persona
|
| 42 |
+
print("Testing Planning extraction (should use fast_llm)...")
|
| 43 |
+
await app.handle_close_ideate([{"role": "user", "content": "hi"}], persona="planning")
|
| 44 |
+
assert all(l == app.fast_llm for l in called_llms[:8])
|
| 45 |
+
|
| 46 |
+
print("Persona model selection test PASSED!")
|
| 47 |
+
|
| 48 |
+
if __name__ == "__main__":
|
| 49 |
+
asyncio.run(test_persona_models())
|