Spaces:

Ashok75
/

react

Sleeping

App Files Files Community

Ashok75 commited on 29 days ago

Commit

38951bc

verified ·

1 Parent(s): 1deee4e

Update app.py

Browse files

Files changed (1) hide show

app.py +41 -68

app.py CHANGED Viewed

@@ -1,77 +1,50 @@
-import torch
-import re
 from flask import Flask, request, Response, render_template
-from transformers import AutoModelForCausalLM, AutoTokenizer
-from langgraph.graph import StateGraph, END
-from typing import TypedDict
 app = Flask(__name__)
-# 1. Loading the Cognitive Core [5, 6]
-model_id = "AshokGakr/model-tiny"
-tokenizer = AutoTokenizer.from_pretrained(model_id)
-model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float16, device_map="auto")
-# 2. Defining State and Tools [7, 8]
-class AgentState(TypedDict):
-    messages: list[dict]
-    next_action: str
-def get_time(query: str):
-    from datetime import datetime
-    return f"Observation: The current time is {datetime.now().strftime('%H:%M:%S')}."
-tools = {"get_time": get_time}
-# 3. The Reasoning Node [9, 10]
-def call_model(state: AgentState):
-    # Context Engineering: Applying chat template for multi-turn coherence [11, 12]
-    inputs = tokenizer.apply_chat_template(state['messages'], add_generation_prompt=True, return_tensors="pt").to(model.device)
-    # Generate and Slice: Correctly targeting the token dimension to avoid empty strings
-    output_ids = model.generate(**inputs, max_new_tokens=256, do_sample=True, temperature=0.7)
-    new_tokens = output_ids[0, inputs['input_ids'].shape[-1]:]
-    response = tokenizer.decode(new_tokens, skip_special_tokens=True)
-    # Logic to identify if the agent needs a tool or has a Final Answer [13, 14]
-    action_match = re.search(r"Action:\s*(\w+)", response)
-    return {
-        "messages": state['messages'] + [{"role": "assistant", "content": response}],
-        "next_action": action_match.group(1) if action_match else "end"
-    }
-# 4. The Action Node [14, 15]
-def execute_tool(state: AgentState):
-    tool_name = state['next_action']
-    observation = tools[tool_name]("")
-    return {"messages": state['messages'] + [{"role": "user", "content": observation}]}
-# 5. Graph Construction [9, 16]
-workflow = StateGraph(AgentState)
-workflow.add_node("agent", call_model)
-workflow.add_node("tools", execute_tool)
-workflow.set_entry_point("agent")
-workflow.add_conditional_edges("agent", lambda x: "tools" if x["next_action"] in tools else "end", {"tools": "tools", "end": END})
-workflow.add_edge("tools", "agent")
-agent_executor = workflow.compile()
 @app.route('/chat', methods=['POST'])
 def chat():
-    user_msg = request.json.get("message")
-    # System Prompt: Establishing identity and rules [17, 18]
-    inputs = {"messages": [
-        {"role": "system", "content": "You are a ReAct agent. Use Thought:, Action:, and Final Answer: tags."},
-        {"role": "user", "content": user_msg}
-    ]}
-    def run():
-        for output in agent_executor.stream(inputs):
-            for key, value in output.items():
-                yield value['messages'][-1]['content'] + "\n"
-    return Response(run(), mimetype='text/plain')
-@app.route('/')
-def index(): return render_template('index.html')
-if __name__ == '__main__': app.run(host='0.0.0.0', port=7860)

+import json
 from flask import Flask, request, Response, render_template
+from llama_cpp import Llama
 app = Flask(__name__)
+# Load the Nanbeige 4.1 3B GGUF model
+# Ensure the .gguf file is in the same directory
+llm = Llama(
+    model_path="nanbeige4.1-3b-Q5_K_M.gguf",
+    n_ctx=2048,  # Attention budget [8]
+    n_threads=4,
+    verbose=False
+)
+SYSTEM_PROMPT = (
+    "You are a helpful assistant. Before giving your final answer, "
+    "provide your internal reasoning inside <thought> tags. "
+    "Format: <thought>Your reasoning here</thought> Final response here."
+)
+@app.route('/')
+def index():
+    return render_template('index.html')
 @app.route('/chat', methods=['POST'])
 def chat():
+    user_input = request.json.get("message")
+    # Constructing the context window [9]
+    prompt = f"System: {SYSTEM_PROMPT}\nUser: {user_input}\nAssistant:"
+    def generate():
+        # Streaming inference [10]
+        stream = llm(
+            prompt,
+            max_tokens=512,
+            stream=True,
+            temperature=0.7,
+            stop=["User:", "System:"]
+        )
+        for chunk in stream:
+            text = chunk['choices']['text']
+            if text:
+                yield text
+    return Response(generate(), mimetype='text/plain')
+if __name__ == '__main__':
+    app.run(host='0.0.0.0', port=7860)