Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -597,7 +597,7 @@ class SearchRouter:
|
|
| 597 |
# Initialize the router globally
|
| 598 |
search_router = SearchRouter()
|
| 599 |
|
| 600 |
-
def build_smart_prompt(conversation_history: List[Dict[str, str]], context: str = "", original_prompt: str = "") -> str:
|
| 601 |
"""
|
| 602 |
Builds an intelligent prompt that defines the 'ToolBoxesAI Assistant' persona
|
| 603 |
and enforces strict adherence to provided context (Web/RAG) to prevent hallucinations.
|
|
@@ -652,7 +652,7 @@ def build_smart_prompt(conversation_history: List[Dict[str, str]], context: str
|
|
| 652 |
final_messages,
|
| 653 |
tokenize=False,
|
| 654 |
add_generation_prompt=True,
|
| 655 |
-
enable_thinking=
|
| 656 |
)
|
| 657 |
|
| 658 |
return prompt_str
|
|
@@ -804,6 +804,7 @@ class PromptRequest(BaseModel):
|
|
| 804 |
temperature: float = 0.7
|
| 805 |
enable_code_execution: bool = True
|
| 806 |
enable_web_search: bool = True
|
|
|
|
| 807 |
|
| 808 |
@app.get("/")
|
| 809 |
async def root():
|
|
@@ -865,7 +866,7 @@ async def chat_with_model_async(request: PromptRequest):
|
|
| 865 |
logger.info(f"🛠 Tool selected: {tool_result['tool_name'] or 'None'}")
|
| 866 |
|
| 867 |
# Step 3: Build prompt and prepare streaming response
|
| 868 |
-
prompt_str = build_smart_prompt(conversation_history, context, request.prompt)
|
| 869 |
|
| 870 |
# Model generation (still needs to run in thread due to PyTorch limitations)
|
| 871 |
inputs = tokenizer(prompt_str, return_tensors="pt").to(model.device)
|
|
|
|
| 597 |
# Initialize the router globally
|
| 598 |
search_router = SearchRouter()
|
| 599 |
|
| 600 |
+
def build_smart_prompt(conversation_history: List[Dict[str, str]], context: str = "", original_prompt: str = "", enable_thinking: bool = False) -> str:
|
| 601 |
"""
|
| 602 |
Builds an intelligent prompt that defines the 'ToolBoxesAI Assistant' persona
|
| 603 |
and enforces strict adherence to provided context (Web/RAG) to prevent hallucinations.
|
|
|
|
| 652 |
final_messages,
|
| 653 |
tokenize=False,
|
| 654 |
add_generation_prompt=True,
|
| 655 |
+
enable_thinking=enable_thinking
|
| 656 |
)
|
| 657 |
|
| 658 |
return prompt_str
|
|
|
|
| 804 |
temperature: float = 0.7
|
| 805 |
enable_code_execution: bool = True
|
| 806 |
enable_web_search: bool = True
|
| 807 |
+
enable_thinking: bool
|
| 808 |
|
| 809 |
@app.get("/")
|
| 810 |
async def root():
|
|
|
|
| 866 |
logger.info(f"🛠 Tool selected: {tool_result['tool_name'] or 'None'}")
|
| 867 |
|
| 868 |
# Step 3: Build prompt and prepare streaming response
|
| 869 |
+
prompt_str = build_smart_prompt(conversation_history, context, request.prompt, enable_thinking=request.enable_thinking)
|
| 870 |
|
| 871 |
# Model generation (still needs to run in thread due to PyTorch limitations)
|
| 872 |
inputs = tokenizer(prompt_str, return_tensors="pt").to(model.device)
|