Spaces:

Ashok75
/

react

Sleeping

App Files Files Community

Ashok75 commited on Mar 5

Commit

148ca6e

verified ·

1 Parent(s): 434ea9b

Update app.py

Browse files

Files changed (1) hide show

app.py +81 -39

app.py CHANGED Viewed

@@ -1,19 +1,52 @@
 import torch
 from flask import Flask, request, Response, render_template
 from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
 from threading import Thread
 app = Flask(__name__)
-# 1. Load the Model and Tokenizer
-# The sources highlight that the LLM serves as the cognitive core or "brain" [5, 6].
 model_id = "AshokGakr/model-tiny"
 tokenizer = AutoTokenizer.from_pretrained(model_id)
-model = AutoModelForCausalLM.from_pretrained(
-    model_id,
-    torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
-    device_map="auto"
-)
 @app.route('/')
 def index():
@@ -22,37 +55,46 @@ def index():
 @app.route('/chat', methods=['POST'])
 def chat():
     data = request.json
-    user_messages = data.get("messages", [])
-    # 2. Apply Chat Template
-    # Using the specific Jinja template from the model repo to format the prompt.
-    # This ensures the model follows the intended "Thought-Action-Observation" cycle [7].
-    input_ids = tokenizer.apply_chat_template(
-        user_messages,
-        add_generation_prompt=True,
-        return_tensors="pt"
-    ).to(model.device)
-    # 3. Setup Streaming
-    # Context engineering involves curating the optimal set of tokens for inference [8].
-    streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
-    generation_kwargs = dict(
-        input_ids=input_ids,
-        streamer=streamer,
-        max_new_tokens=512,
-        do_sample=True,
-        temperature=0.7
-    )
-    # Run generation in a separate thread to allow the Flask response to stream
-    thread = Thread(target=model.generate, kwargs=generation_kwargs)
-    thread.start()
-    def generate():
-        for new_text in streamer:
-            yield new_text
-    return Response(generate(), mimetype='text/plain')
 if __name__ == '__main__':
-    app.run(host='0.0.0.0', port=7860) # Standard HF Space port

 import torch
+import json
+import re
+import datetime
 from flask import Flask, request, Response, render_template
 from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
 from threading import Thread
 app = Flask(__name__)
+# 1. TOOL DEFINITIONS
+def get_current_datetime(query: str = ""):
+    """Returns the current date and time."""
+    return f"Observation: The current date and time is {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}."
+def simple_calculator(expression: str):
+    """An easy-to-construct tool for basic math (add, sub, mult, div)."""
+    try:
+        # Source 351: Calculators are essential tools for deterministic results.
+        # Note: In production, use a safer math parser instead of eval.
+        result = eval(expression, {"__builtins__": None}, {})
+        return f"Observation: The calculation result is {result}."
+    except Exception as e:
+        return f"Observation: Error in calculation: {str(e)}."
+# Tool Registry
+tools = {
+    "get_current_datetime": get_current_datetime,
+    "simple_calculator": simple_calculator
+}
+# Load Model
 model_id = "AshokGakr/model-tiny"
 tokenizer = AutoTokenizer.from_pretrained(model_id)
+model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float16, device_map="auto")
+SYSTEM_PROMPT = """
+ROLE: You are a ReAct Agent. You solve tasks using this loop:
+Thought: (Reasoning about what to do)
+Action: (Tool name: 'get_current_datetime' or 'simple_calculator')
+Action Input: (Parameter for the tool)
+Observation: (Result from the tool - provided to you)
+... (Repeat Thought/Action/Observation if needed)
+Final Answer: (The final response to the user)
+AVAILABLE TOOLS:
+- get_current_datetime: Use this for any questions about the current date or time. No input needed.
+- simple_calculator: Use this for any math calculations. Input should be a math expression (e.g., '10 + 5').
+"""
 @app.route('/')
 def index():
 @app.route('/chat', methods=['POST'])
 def chat():
     data = request.json
+    user_query = data.get("message", "")
+    def generate_agent_response():
+        # Source 13: Episodic memory maintains the conversation trajectory.
+        history = [{"role": "system", "content": SYSTEM_PROMPT}, {"role": "user", "content": user_query}]
+        for i in range(5): # Limit iterations to prevent infinite loops [5]
+            input_ids = tokenizer.apply_chat_template(history, add_generation_prompt=True, return_tensors="pt").to(model.device)
+            streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
+            thread = Thread(target=model.generate, kwargs={"input_ids": input_ids, "streamer": streamer, "max_new_tokens": 256})
+            thread.start()
+            full_turn_output = ""
+            for new_text in streamer:
+                full_turn_output += new_text
+                yield new_text # Stream thoughts to the UI [6]
+            # Check for Action [7]
+            action_match = re.search(r"Action:\s*(\w+)", full_turn_output)
+            input_match = re.search(r"Action Input:\s*(.*)", full_turn_output)
+            if action_match and input_match:
+                tool_name = action_match.group(1).strip()
+                tool_input = input_match.group(1).strip()
+                if tool_name in tools:
+                    obs = tools[tool_name](tool_input)
+                    yield f"\n{obs}\n"
+                    # Feed observation back into history [8, 9]
+                    history.append({"role": "assistant", "content": full_turn_output})
+                    history.append({"role": "user", "content": obs})
+                else:
+                    break
+            elif "Final Answer:" in full_turn_output:
+                break
+            else:
+                break
+    return Response(generate_agent_response(), mimetype='text/plain')
 if __name__ == '__main__':
+    app.run(host='0.0.0.0', port=7860)