Deepak Perla commited on
Commit
9684b82
·
1 Parent(s): c655670

Added app.py and Dockerfile

Browse files
Files changed (4) hide show
  1. Dockerfile +9 -3
  2. MedAI-LLM/README.md +10 -0
  3. app.py +15 -3
  4. requirements.txt +3 -0
Dockerfile CHANGED
@@ -1,8 +1,14 @@
1
- FROM python:3.9
2
 
3
  WORKDIR /app
4
- COPY . /app
5
 
 
 
6
  RUN pip install -r requirements.txt
7
 
8
- CMD ["python", "app.py"]
 
 
 
 
 
 
1
+ FROM python:3.10
2
 
3
  WORKDIR /app
 
4
 
5
+ # Install dependencies
6
+ COPY requirements.txt .
7
  RUN pip install -r requirements.txt
8
 
9
+ # Copy app files
10
+ COPY . .
11
+
12
+ # Expose port & run app
13
+ EXPOSE 7860
14
+ CMD ["python", "app.py"]
MedAI-LLM/README.md ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: MedAI LLM
3
+ emoji: 🏆
4
+ colorFrom: purple
5
+ colorTo: gray
6
+ sdk: docker
7
+ pinned: false
8
+ ---
9
+
10
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py CHANGED
@@ -1,12 +1,24 @@
1
  from flask import Flask, request, jsonify
 
 
2
 
3
  app = Flask(__name__)
4
 
 
 
 
 
 
5
  @app.route("/predict", methods=["POST"])
6
  def predict():
7
- data = request.get_json()
8
- user_input = data.get("input", "")
9
- response = f"Generated text for: {user_input}" # Dummy response
 
 
 
 
 
10
  return jsonify({"response": response})
11
 
12
  if __name__ == "__main__":
 
1
  from flask import Flask, request, jsonify
2
+ from transformers import AutoModelForCausalLM, AutoTokenizer
3
+ import torch
4
 
5
  app = Flask(__name__)
6
 
7
+ # Load base model and tokenizer
8
+ MODEL_NAME = "meta-llama/Llama-2-7b-chat-hf" # Change this to your base model
9
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
10
+ model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, torch_dtype=torch.float16, device_map="auto")
11
+
12
  @app.route("/predict", methods=["POST"])
13
  def predict():
14
+ data = request.json
15
+ input_text = data.get("text", "")
16
+
17
+ # Tokenize input
18
+ inputs = tokenizer(input_text, return_tensors="pt").to("cuda")
19
+ outputs = model.generate(**inputs, max_length=200)
20
+ response = tokenizer.decode(outputs[0], skip_special_tokens=True)
21
+
22
  return jsonify({"response": response})
23
 
24
  if __name__ == "__main__":
requirements.txt CHANGED
@@ -1,3 +1,6 @@
1
  flask
2
  torch
3
  transformers
 
 
 
 
1
  flask
2
  torch
3
  transformers
4
+ gunicorn
5
+ gradio
6
+