Ashok75 commited on
Commit
38951bc
·
verified ·
1 Parent(s): 1deee4e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +41 -68
app.py CHANGED
@@ -1,77 +1,50 @@
1
- import torch
2
- import re
3
  from flask import Flask, request, Response, render_template
4
- from transformers import AutoModelForCausalLM, AutoTokenizer
5
- from langgraph.graph import StateGraph, END
6
- from typing import TypedDict
7
 
8
  app = Flask(__name__)
9
 
10
- # 1. Loading the Cognitive Core [5, 6]
11
- model_id = "AshokGakr/model-tiny"
12
- tokenizer = AutoTokenizer.from_pretrained(model_id)
13
- model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float16, device_map="auto")
 
 
 
 
 
 
 
 
 
 
14
 
15
- # 2. Defining State and Tools [7, 8]
16
- class AgentState(TypedDict):
17
- messages: list[dict]
18
- next_action: str
19
-
20
- def get_time(query: str):
21
- from datetime import datetime
22
- return f"Observation: The current time is {datetime.now().strftime('%H:%M:%S')}."
23
-
24
- tools = {"get_time": get_time}
25
-
26
- # 3. The Reasoning Node [9, 10]
27
- def call_model(state: AgentState):
28
- # Context Engineering: Applying chat template for multi-turn coherence [11, 12]
29
- inputs = tokenizer.apply_chat_template(state['messages'], add_generation_prompt=True, return_tensors="pt").to(model.device)
30
-
31
- # Generate and Slice: Correctly targeting the token dimension to avoid empty strings
32
- output_ids = model.generate(**inputs, max_new_tokens=256, do_sample=True, temperature=0.7)
33
- new_tokens = output_ids[0, inputs['input_ids'].shape[-1]:]
34
- response = tokenizer.decode(new_tokens, skip_special_tokens=True)
35
-
36
- # Logic to identify if the agent needs a tool or has a Final Answer [13, 14]
37
- action_match = re.search(r"Action:\s*(\w+)", response)
38
- return {
39
- "messages": state['messages'] + [{"role": "assistant", "content": response}],
40
- "next_action": action_match.group(1) if action_match else "end"
41
- }
42
-
43
- # 4. The Action Node [14, 15]
44
- def execute_tool(state: AgentState):
45
- tool_name = state['next_action']
46
- observation = tools[tool_name]("")
47
- return {"messages": state['messages'] + [{"role": "user", "content": observation}]}
48
-
49
- # 5. Graph Construction [9, 16]
50
- workflow = StateGraph(AgentState)
51
- workflow.add_node("agent", call_model)
52
- workflow.add_node("tools", execute_tool)
53
- workflow.set_entry_point("agent")
54
- workflow.add_conditional_edges("agent", lambda x: "tools" if x["next_action"] in tools else "end", {"tools": "tools", "end": END})
55
- workflow.add_edge("tools", "agent")
56
- agent_executor = workflow.compile()
57
 
58
  @app.route('/chat', methods=['POST'])
59
  def chat():
60
- user_msg = request.json.get("message")
61
- # System Prompt: Establishing identity and rules [17, 18]
62
- inputs = {"messages": [
63
- {"role": "system", "content": "You are a ReAct agent. Use Thought:, Action:, and Final Answer: tags."},
64
- {"role": "user", "content": user_msg}
65
- ]}
66
 
67
- def run():
68
- for output in agent_executor.stream(inputs):
69
- for key, value in output.items():
70
- yield value['messages'][-1]['content'] + "\n"
71
-
72
- return Response(run(), mimetype='text/plain')
73
-
74
- @app.route('/')
75
- def index(): return render_template('index.html')
76
-
77
- if __name__ == '__main__': app.run(host='0.0.0.0', port=7860)
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
 
2
  from flask import Flask, request, Response, render_template
3
+ from llama_cpp import Llama
 
 
4
 
5
  app = Flask(__name__)
6
 
7
+ # Load the Nanbeige 4.1 3B GGUF model
8
+ # Ensure the .gguf file is in the same directory
9
+ llm = Llama(
10
+ model_path="nanbeige4.1-3b-Q5_K_M.gguf",
11
+ n_ctx=2048, # Attention budget [8]
12
+ n_threads=4,
13
+ verbose=False
14
+ )
15
+
16
+ SYSTEM_PROMPT = (
17
+ "You are a helpful assistant. Before giving your final answer, "
18
+ "provide your internal reasoning inside <thought> tags. "
19
+ "Format: <thought>Your reasoning here</thought> Final response here."
20
+ )
21
 
22
+ @app.route('/')
23
+ def index():
24
+ return render_template('index.html')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
 
26
  @app.route('/chat', methods=['POST'])
27
  def chat():
28
+ user_input = request.json.get("message")
 
 
 
 
 
29
 
30
+ # Constructing the context window [9]
31
+ prompt = f"System: {SYSTEM_PROMPT}\nUser: {user_input}\nAssistant:"
32
+
33
+ def generate():
34
+ # Streaming inference [10]
35
+ stream = llm(
36
+ prompt,
37
+ max_tokens=512,
38
+ stream=True,
39
+ temperature=0.7,
40
+ stop=["User:", "System:"]
41
+ )
42
+ for chunk in stream:
43
+ text = chunk['choices']['text']
44
+ if text:
45
+ yield text
46
+
47
+ return Response(generate(), mimetype='text/plain')
48
+
49
+ if __name__ == '__main__':
50
+ app.run(host='0.0.0.0', port=7860)