Ashok75 commited on
Commit
148ca6e
·
verified ·
1 Parent(s): 434ea9b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +81 -39
app.py CHANGED
@@ -1,19 +1,52 @@
1
  import torch
 
 
 
2
  from flask import Flask, request, Response, render_template
3
  from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
4
  from threading import Thread
5
 
6
  app = Flask(__name__)
7
 
8
- # 1. Load the Model and Tokenizer
9
- # The sources highlight that the LLM serves as the cognitive core or "brain" [5, 6].
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  model_id = "AshokGakr/model-tiny"
11
  tokenizer = AutoTokenizer.from_pretrained(model_id)
12
- model = AutoModelForCausalLM.from_pretrained(
13
- model_id,
14
- torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
15
- device_map="auto"
16
- )
 
 
 
 
 
 
 
 
 
 
17
 
18
  @app.route('/')
19
  def index():
@@ -22,37 +55,46 @@ def index():
22
  @app.route('/chat', methods=['POST'])
23
  def chat():
24
  data = request.json
25
- user_messages = data.get("messages", [])
26
-
27
- # 2. Apply Chat Template
28
- # Using the specific Jinja template from the model repo to format the prompt.
29
- # This ensures the model follows the intended "Thought-Action-Observation" cycle [7].
30
- input_ids = tokenizer.apply_chat_template(
31
- user_messages,
32
- add_generation_prompt=True,
33
- return_tensors="pt"
34
- ).to(model.device)
35
-
36
- # 3. Setup Streaming
37
- # Context engineering involves curating the optimal set of tokens for inference [8].
38
- streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
39
- generation_kwargs = dict(
40
- input_ids=input_ids,
41
- streamer=streamer,
42
- max_new_tokens=512,
43
- do_sample=True,
44
- temperature=0.7
45
- )
46
-
47
- # Run generation in a separate thread to allow the Flask response to stream
48
- thread = Thread(target=model.generate, kwargs=generation_kwargs)
49
- thread.start()
50
-
51
- def generate():
52
- for new_text in streamer:
53
- yield new_text
54
-
55
- return Response(generate(), mimetype='text/plain')
 
 
 
 
 
 
 
 
 
56
 
57
  if __name__ == '__main__':
58
- app.run(host='0.0.0.0', port=7860) # Standard HF Space port
 
1
  import torch
2
+ import json
3
+ import re
4
+ import datetime
5
  from flask import Flask, request, Response, render_template
6
  from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
7
  from threading import Thread
8
 
9
  app = Flask(__name__)
10
 
11
+ # 1. TOOL DEFINITIONS
12
+ def get_current_datetime(query: str = ""):
13
+ """Returns the current date and time."""
14
+ return f"Observation: The current date and time is {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}."
15
+
16
+ def simple_calculator(expression: str):
17
+ """An easy-to-construct tool for basic math (add, sub, mult, div)."""
18
+ try:
19
+ # Source 351: Calculators are essential tools for deterministic results.
20
+ # Note: In production, use a safer math parser instead of eval.
21
+ result = eval(expression, {"__builtins__": None}, {})
22
+ return f"Observation: The calculation result is {result}."
23
+ except Exception as e:
24
+ return f"Observation: Error in calculation: {str(e)}."
25
+
26
+ # Tool Registry
27
+ tools = {
28
+ "get_current_datetime": get_current_datetime,
29
+ "simple_calculator": simple_calculator
30
+ }
31
+
32
+ # Load Model
33
  model_id = "AshokGakr/model-tiny"
34
  tokenizer = AutoTokenizer.from_pretrained(model_id)
35
+ model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float16, device_map="auto")
36
+
37
+ SYSTEM_PROMPT = """
38
+ ROLE: You are a ReAct Agent. You solve tasks using this loop:
39
+ Thought: (Reasoning about what to do)
40
+ Action: (Tool name: 'get_current_datetime' or 'simple_calculator')
41
+ Action Input: (Parameter for the tool)
42
+ Observation: (Result from the tool - provided to you)
43
+ ... (Repeat Thought/Action/Observation if needed)
44
+ Final Answer: (The final response to the user)
45
+
46
+ AVAILABLE TOOLS:
47
+ - get_current_datetime: Use this for any questions about the current date or time. No input needed.
48
+ - simple_calculator: Use this for any math calculations. Input should be a math expression (e.g., '10 + 5').
49
+ """
50
 
51
  @app.route('/')
52
  def index():
 
55
  @app.route('/chat', methods=['POST'])
56
  def chat():
57
  data = request.json
58
+ user_query = data.get("message", "")
59
+
60
+ def generate_agent_response():
61
+ # Source 13: Episodic memory maintains the conversation trajectory.
62
+ history = [{"role": "system", "content": SYSTEM_PROMPT}, {"role": "user", "content": user_query}]
63
+
64
+ for i in range(5): # Limit iterations to prevent infinite loops [5]
65
+ input_ids = tokenizer.apply_chat_template(history, add_generation_prompt=True, return_tensors="pt").to(model.device)
66
+ streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
67
+
68
+ thread = Thread(target=model.generate, kwargs={"input_ids": input_ids, "streamer": streamer, "max_new_tokens": 256})
69
+ thread.start()
70
+
71
+ full_turn_output = ""
72
+ for new_text in streamer:
73
+ full_turn_output += new_text
74
+ yield new_text # Stream thoughts to the UI [6]
75
+
76
+ # Check for Action [7]
77
+ action_match = re.search(r"Action:\s*(\w+)", full_turn_output)
78
+ input_match = re.search(r"Action Input:\s*(.*)", full_turn_output)
79
+
80
+ if action_match and input_match:
81
+ tool_name = action_match.group(1).strip()
82
+ tool_input = input_match.group(1).strip()
83
+
84
+ if tool_name in tools:
85
+ obs = tools[tool_name](tool_input)
86
+ yield f"\n{obs}\n"
87
+ # Feed observation back into history [8, 9]
88
+ history.append({"role": "assistant", "content": full_turn_output})
89
+ history.append({"role": "user", "content": obs})
90
+ else:
91
+ break
92
+ elif "Final Answer:" in full_turn_output:
93
+ break
94
+ else:
95
+ break
96
+
97
+ return Response(generate_agent_response(), mimetype='text/plain')
98
 
99
  if __name__ == '__main__':
100
+ app.run(host='0.0.0.0', port=7860)