Spaces:

FlameF0X
/

DeepThoughTree

Running on Zero

File size: 7,241 Bytes

64dfa83
 
 
a8b2c5c
64dfa83
 
a8b2c5c
a7e01dd
a8b2c5c
 
 
 
 
64dfa83
49dd498
9f61929
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a8b2c5c
64dfa83
 
1b23046
02ee37a
64dfa83
02ee37a
1b23046
 
 
 
 
 
 
64dfa83
1b23046
64dfa83
 
1b23046
64dfa83
1b23046
 
 
64dfa83
 
 
9f61929
 
 
 
 
 
 
 
 
 
64dfa83
9f61929
 
49dd498
9f61929
 
 
64dfa83
 
 
9f61929
1b23046
a8b2c5c
02ee37a
9f61929
a8b2c5c
9f61929
 
 
 
 
 
 
 
 
 
 
 
64dfa83
49dd498
9f61929
1b23046
64dfa83
1b23046
a8b2c5c
 
64dfa83
a8b2c5c
 
 
 
64dfa83
1b23046
 
 
 
 
 
 
64dfa83
 
 
 
 
 
a8b2c5c
1b23046
64dfa83
1b23046
 
 
 
64dfa83
1b23046
 
02ee37a
 
1b23046
02ee37a
1b23046
02ee37a
 
1b23046
 
02ee37a
64dfa83
 
 
9f61929
 
 
 
 
 
 
 
 
 
 
49dd498
9f61929
64dfa83
02ee37a
 
1b23046
02ee37a
64dfa83
 
 
 
 
a8b2c5c
 
 
 
 
02ee37a
de1de04
64dfa83
 
a8b2c5c
64dfa83

import gradio as gr
import spaces
import torch
import re
from transformers import pipeline

# Initialize the lightweight LiquidAI Thinking Model
MODEL_ID = "LiquidAI/LFM2.5-1.2B-Thinking" 
generator = pipeline(
    "text-generation", 
    model=MODEL_ID, 
    dtype=torch.bfloat16
)

def generate_with_chat_template(messages, max_new_tokens=1536, do_sample=True, temperature=0.7, num_return_sequences=1):
    """
    Applies the model's chat template to structure the prompts properly, 
    preventing prompt injection or completion confusion.
    """
    tokenizer = generator.tokenizer
    prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
    
    outputs = generator(
        prompt,
        max_new_tokens=max_new_tokens,
        max_length=None,
        generation_config=None,
        num_return_sequences=num_return_sequences,
        do_sample=do_sample,
        temperature=temperature if do_sample else None
    )
    
    results = []
    for out in outputs:
        gen_text = out['generated_text']
        # Extract only the newly generated text following the prompt
        if gen_text.startswith(prompt):
            gen_text = gen_text[len(prompt):]
        results.append(gen_text.strip())
    return results

@spaces.GPU(duration=60)  # 60s is plenty because LiquidAI is blindingly fast
def tot_search(problem: str, branches: int = 3, max_depth: int = 3) -> str:
    """
    Executes a Tree of Thoughts (ToT) search using the LiquidAI 1.2B Thinking model
    and returns ONLY the final clean answer to the client.
    """
    # Track statistics for server-side logs
    stats = {
        "nodes_generated": 0,
        "nodes_evaluated": 0,
        "nodes_pruned": 0,
        "actual_depth_reached": 0
    }
    
    current_paths = [{"history": [problem], "score": 1.0}]
    tree_history_log = []
    
    for depth in range(max_depth):
        stats["actual_depth_reached"] = depth + 1
        new_paths = []
        depth_log = []
        
        for p_idx, path in enumerate(current_paths):
            latest_thought = path["history"][-1]
            
            # 1. GENERATE BRANCHES
            messages = [
                {
                    "role": "system", 
                    "content": "You are a helpful reasoning assistant. Provide a single, distinct, and logical next step to solve the user's problem. Be extremely concise, direct, and focused."
                },
                {
                    "role": "user", 
                    "content": f"Problem: {problem}\nCurrent progress: {latest_thought}\nWhat is the single next logical step?"
                }
            ]
            
            outputs = generate_with_chat_template(
                messages, 
                max_new_tokens=1536, 
                do_sample=True, 
                temperature=0.7, 
                num_return_sequences=branches
            )
            
            # 2. EVALUATE/SCORE BRANCHES
            for out_text in outputs:
                stats["nodes_generated"] += 1
                
                # LiquidAI wraps internal thoughts in <think> tags. Extract clean step.
                next_step = re.sub(r'<think>.*?</think>', '', out_text, flags=re.DOTALL).strip()
                if not next_step:
                    next_step = out_text  # Fallback
                
                eval_messages = [
                    {
                        "role": "system", 
                        "content": "You are an evaluator. Your task is to rate whether a proposed next step is helpful for solving the given problem. You must respond with exactly one of these words: 'Good', 'Maybe', or 'Bad'. Do not explain your choice."
                    },
                    {
                        "role": "user", 
                        "content": f"Problem: {problem}\nProposed Next Step: {next_step}\nIs this step 'Good', 'Maybe', or 'Bad'?"
                    }
                ]
                
                eval_outs = generate_with_chat_template(eval_messages, max_new_tokens=1536, do_sample=False)
                eval_text = eval_outs[0].lower()
                stats["nodes_evaluated"] += 1
                
                # Strip thinking tags from evaluation
                eval_text = re.sub(r'<think>.*?</think>', '', eval_text, flags=re.DOTALL).strip()
                
                score = 0.0
                if "good" in eval_text: 
                    score = 1.0
                elif "maybe" in eval_text: 
                    score = 0.5
                
                depth_log.append({
                    "parent_node": p_idx,
                    "thought": next_step[:120] + "...", 
                    "evaluation": eval_text,
                    "score": score
                })
                
                if score > 0:
                    new_paths.append({
                        "history": path["history"] + [next_step],
                        "score": score
                    })
        
        # 3. PRUNE (Keep top 2 paths)
        original_count = len(new_paths)
        current_paths = sorted(new_paths, key=lambda x: x["score"], reverse=True)[:2]
        stats["nodes_pruned"] += (original_count - len(current_paths))
        
        tree_history_log.append((depth + 1, depth_log))
        
        if not current_paths:
            break

    # Print execution trace to the background Hugging Face Space console logs
    print("\n--- Tree of Thoughts Execution Logs ---")
    for d, logs in tree_history_log:
        print(f"Depth {d}:")
        for l in logs:
            print(f"  - [{l['evaluation'].upper()}] Thought: {l['thought']}")
    print(f"Stats: Depth={stats['actual_depth_reached']}, Generated={stats['nodes_generated']}, Pruned={stats['nodes_pruned']}\n")

    if not current_paths:
        return "Error: All reasoning paths hit a dead end."

    # 4. SYNTHESIZE THE WINNING PATH
    best_chain = " -> ".join(current_paths[0]["history"])
    final_messages = [
        {
            "role": "system", 
            "content": "You are a helpful assistant. Synthesize the final, concise answer to the problem based on the provided reasoning path. Do not include any meta-reasoning, instruction-following placeholders, or thinking tags. Provide only the clean, final direct answer."
        },
        {
            "role": "user", 
            "content": f"Problem: {problem}\nReasoning path: {best_chain}\nWhat is the final concise answer?"
        }
    ]
    
    final_outs = generate_with_chat_template(final_messages, max_new_tokens=1536, do_sample=False)
    final_response = final_outs[0]
    
    # Strip everything down to get just the clean answer
    clean_answer = re.sub(r'<think>.*?</think>', '', final_response, flags=re.DOTALL).strip()
    
    return clean_answer


# Define the Gradio Interface
demo = gr.Interface(
    fn=tot_search,
    inputs=[
        gr.Textbox(label="Problem"), 
        gr.Slider(2, 4, value=3, step=1, label="Branches"), 
        gr.Slider(2, 4, value=3, step=1, label="Max Depth")
    ],
    outputs="text",
    title="DeepThoughTree --- Tree of Thoughts (ToT) Orchestrator"
)

# Launch with MCP enabled
demo.launch(mcp_server=True)