prarabdha21 commited on
Commit
331b148
·
1 Parent(s): 5d939a0
Files changed (4) hide show
  1. .idea/vcs.xml +6 -0
  2. app.py +16 -22
  3. dockerfile +15 -9
  4. requirements.txt +3 -4
.idea/vcs.xml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="VcsDirectoryMappings">
4
+ <mapping directory="" vcs="Git" />
5
+ </component>
6
+ </project>
app.py CHANGED
@@ -1,29 +1,23 @@
1
- from flask import Flask, request, jsonify
 
2
  import torch
3
- from transformers import AutoModelForCausalLM, AutoTokenizer
4
 
5
- app = Flask(__name__)
6
 
7
- MODEL_NAME = "meta-llama/Llama-2-7b-chat-hf" # Ensure you have access!
8
- tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_auth_token=True)
9
- model = AutoModelForCausalLM.from_pretrained(
10
- MODEL_NAME, torch_dtype=torch.float16, device_map="auto", use_auth_token=True
11
- )
12
 
13
- @app.route('/generate', methods=['POST'])
14
- def generate():
15
- user_request = request.json.get("query")
16
- if not user_request:
17
- return jsonify({"error": "No query provided"}), 400
18
 
19
- mongo_query = generate_mongo_query(user_request)
20
- return jsonify({"mongo_query": mongo_query})
 
 
 
21
 
22
- def generate_mongo_query(user_request):
23
- prompt = f"Convert this request to a MongoDB query: {user_request}"
24
- inputs = tokenizer(prompt, return_tensors="pt").to("cuda" if torch.cuda.is_available() else "cpu")
25
- output = model.generate(**inputs, max_length=150)
26
- return tokenizer.decode(output[0], skip_special_tokens=True)
27
 
28
- if __name__ == '__main__':
29
- app.run(host='0.0.0.0', port=7860)
 
1
+ from fastapi import FastAPI
2
+ from transformers import AutoTokenizer, AutoModelForCausalLM
3
  import torch
 
4
 
5
+ app = FastAPI()
6
 
7
+ # Load Llama 2 model
8
+ MODEL_NAME = "meta-llama/Llama-2-7b-chat-hf"
9
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
10
+ model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, torch_dtype=torch.float16, device_map="auto")
 
11
 
 
 
 
 
 
12
 
13
+ @app.post("/generate")
14
+ async def generate_text(data: dict):
15
+ prompt = data.get("prompt", "")
16
+ if not prompt:
17
+ return {"error": "No prompt provided"}
18
 
19
+ inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
20
+ output = model.generate(**inputs, max_length=200)
21
+ response = tokenizer.decode(output[0], skip_special_tokens=True)
 
 
22
 
23
+ return {"generated_text": response}
 
dockerfile CHANGED
@@ -1,17 +1,23 @@
1
- # Use an official lightweight Python image
2
- FROM python:3.10
 
 
 
 
 
3
 
4
  # Set working directory
5
  WORKDIR /app
6
 
7
- # Copy files to container
8
- COPY app.py requirements.txt /app/
 
9
 
10
- # Install dependencies
11
- RUN pip install --no-cache-dir -r requirements.txt
12
 
13
- # Expose port for Flask
14
  EXPOSE 7860
15
 
16
- # Run the Flask app
17
- CMD ["python", "app.py"]
 
1
+ # Use a Python base image
2
+ FROM python:3.9
3
+
4
+ # Create a user and set up environment
5
+ RUN useradd -m -u 1000 user
6
+ USER user
7
+ ENV PATH="/home/user/.local/bin:$PATH"
8
 
9
  # Set working directory
10
  WORKDIR /app
11
 
12
+ # Copy and install dependencies
13
+ COPY --chown=user ./requirements.txt requirements.txt
14
+ RUN pip install --no-cache-dir --upgrade -r requirements.txt
15
 
16
+ # Copy the application code
17
+ COPY --chown=user . /app
18
 
19
+ # Expose port (optional, usually HF Spaces handles this)
20
  EXPOSE 7860
21
 
22
+ # Run the application
23
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
requirements.txt CHANGED
@@ -1,6 +1,5 @@
1
- flask
2
- torch
3
  transformers
 
4
  accelerate
5
- sentencepiece
6
- huggingface_hub
 
 
 
1
  transformers
2
+ torch
3
  accelerate
4
+ fastapi
5
+ uvicorn