moeid526 commited on
Commit
def5cc8
·
verified ·
1 Parent(s): f955d67

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -22
app.py CHANGED
@@ -1,41 +1,48 @@
1
- from flask import Flask, request, jsonify
2
- from transformers import AutoModelForCausalLM, AutoTokenizer
3
  import torch
 
 
4
 
5
  app = Flask(__name__)
6
 
7
- # Load model with memory optimizations
8
- model_name = "tanusrich/Mental_Health_Chatbot"
9
 
 
10
  device = "cuda" if torch.cuda.is_available() else "cpu"
11
 
12
  # Load tokenizer
13
- tokenizer = AutoTokenizer.from_pretrained(model_name)
14
-
15
- # Load model with reduced memory usage
16
- model = AutoModelForCausalLM.from_pretrained(
17
- model_name,
18
- device_map="auto", # Automatically assign layers to CPU/GPU
19
- torch_dtype=torch.float16, # Use less memory with half-precision
20
- low_cpu_mem_usage=True, # Optimize RAM usage
21
- max_memory={device: "4GB"} # Limit model memory usage
22
- ).to(device)
 
 
23
 
24
  @app.route("/chat", methods=["POST"])
25
  def chat():
26
- user_input = request.json.get("message", "")
 
27
 
28
  if not user_input:
29
  return jsonify({"error": "Message is required"}), 400
30
 
 
31
  inputs = tokenizer(user_input, return_tensors="pt").to(device)
32
-
33
- with torch.no_grad(): # Disable gradient calculation to save memory
34
- output = model.generate(**inputs, max_length=100)
35
 
36
- response = tokenizer.decode(output[0], skip_special_tokens=True)
37
-
38
- return jsonify({"response": response})
 
 
 
 
 
39
 
40
  if __name__ == "__main__":
41
- app.run(debug=True, port=5000)
 
 
 
1
  import torch
2
+ from transformers import AutoModel, AutoTokenizer
3
+ from flask import Flask, request, jsonify
4
 
5
  app = Flask(__name__)
6
 
7
+ # Model name from Hugging Face
8
+ MODEL_NAME = "tanusrich/Mental_Health_Chatbot"
9
 
10
+ # Detect if GPU is available
11
  device = "cuda" if torch.cuda.is_available() else "cpu"
12
 
13
  # Load tokenizer
14
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
15
+
16
+ # Load model with optimized settings
17
+ try:
18
+ model = AutoModel.from_pretrained(
19
+ MODEL_NAME,
20
+ device_map="auto", # Automatically selects best available device
21
+ low_cpu_mem_usage=True # Optimized for lower memory consumption
22
+ ).to(device)
23
+ except Exception as e:
24
+ print(f"Error loading model: {e}")
25
+ exit(1)
26
 
27
  @app.route("/chat", methods=["POST"])
28
  def chat():
29
+ data = request.json
30
+ user_input = data.get("message", "")
31
 
32
  if not user_input:
33
  return jsonify({"error": "Message is required"}), 400
34
 
35
+ # Tokenize input
36
  inputs = tokenizer(user_input, return_tensors="pt").to(device)
 
 
 
37
 
38
+ # Generate response
39
+ with torch.no_grad():
40
+ outputs = model.generate(**inputs, max_length=150)
41
+
42
+ # Decode response
43
+ response_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
44
+
45
+ return jsonify({"response": response_text})
46
 
47
  if __name__ == "__main__":
48
+ app.run(host="0.0.0.0", port=5000)