Spaces:
Sleeping
Sleeping
Antoine KRAJNC commited on
Commit ·
a28f0e9
1
Parent(s): 9b36c98
solve readme
Browse files- .DS_Store +0 -0
- Dockerfile +14 -0
- README.md +9 -12
- __pycache__/app.cpython-312.pyc +0 -0
- __pycache__/app.cpython-39.pyc +0 -0
- app.py +164 -0
- data/articles.csv +16 -0
- requirements.txt +13 -0
- run.sh +10 -0
- salary_predictor/MLProject +15 -0
- salary_predictor/data/Salary_Data.csv +31 -0
- salary_predictor/docker/dockerfile +26 -0
- salary_predictor/docker/requirements.txt +12 -0
- salary_predictor/model.joblib +0 -0
- salary_predictor/run.sh +10 -0
- salary_predictor/train.py +43 -0
- secrets.sh +6 -0
- test.py +10 -0
- test_heroku.py +10 -0
.DS_Store
ADDED
|
Binary file (6.15 kB). View file
|
|
|
Dockerfile
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM continuumio/miniconda3
|
| 2 |
+
|
| 3 |
+
WORKDIR /home/app
|
| 4 |
+
|
| 5 |
+
RUN apt-get update -y
|
| 6 |
+
RUN apt-get install nano unzip curl -y
|
| 7 |
+
|
| 8 |
+
COPY requirements.txt /dependencies/requirements.txt
|
| 9 |
+
RUN pip install -r /dependencies/requirements.txt
|
| 10 |
+
|
| 11 |
+
COPY . /home/app
|
| 12 |
+
|
| 13 |
+
CMD fastapi run app.py --port $PORT
|
| 14 |
+
# CMD gunicorn app:app --bind 0.0.0.0:$PORT --worker-class uvicorn.workers.UvicornWorker
|
README.md
CHANGED
|
@@ -1,12 +1,9 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
---
|
| 11 |
-
|
| 12 |
-
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
|
| 1 |
+
# Fast API Demo App
|
| 2 |
+
|
| 3 |
+
Welcome to [Jedha](https://jedha.co) fast api demo app. Simply `git clone` this repository to checkout how to works. You can play around with it !
|
| 4 |
+
|
| 5 |
+
Check out deployed app here:
|
| 6 |
+
|
| 7 |
+
* [Jedha demo api]()
|
| 8 |
+
|
| 9 |
+
Enjoy 😎
|
|
|
|
|
|
|
|
|
__pycache__/app.cpython-312.pyc
ADDED
|
Binary file (6.42 kB). View file
|
|
|
__pycache__/app.cpython-39.pyc
ADDED
|
Binary file (4.8 kB). View file
|
|
|
app.py
ADDED
|
@@ -0,0 +1,164 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import mlflow
|
| 2 |
+
import uvicorn
|
| 3 |
+
import pandas as pd
|
| 4 |
+
from pydantic import BaseModel
|
| 5 |
+
from typing import Literal, List, Union
|
| 6 |
+
from fastapi import FastAPI, File, UploadFile
|
| 7 |
+
import joblib
|
| 8 |
+
|
| 9 |
+
description = """
|
| 10 |
+
Welcome to Jedha demo API. This app is made for you to understand how FastAPI works! Try it out 🕹️
|
| 11 |
+
|
| 12 |
+
## Introduction Endpoints
|
| 13 |
+
|
| 14 |
+
Here are two endpoints you can try:
|
| 15 |
+
* `/`: **GET** request that display a simple default message.
|
| 16 |
+
* `/greetings`: **GET** request that display a "hello message"
|
| 17 |
+
|
| 18 |
+
## Blog Endpoints
|
| 19 |
+
|
| 20 |
+
Imagine this API deals with blog articles. With the following endpoints, you can retrieve and create blog posts
|
| 21 |
+
* `/blog-articles/{blog_id}`: **GET** request that retrieve a blog article given a `blog_id` as `int`.
|
| 22 |
+
* `/create-blog-article`: POST request that creates a new article
|
| 23 |
+
|
| 24 |
+
## Machine Learning
|
| 25 |
+
|
| 26 |
+
This is a Machine Learning endpoint that predict salary given some years of experience. Here is the endpoint:
|
| 27 |
+
|
| 28 |
+
* `/predict` that accepts `floats`
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
Check out documentation below 👇 for more information on each endpoint.
|
| 32 |
+
"""
|
| 33 |
+
|
| 34 |
+
tags_metadata = [
|
| 35 |
+
{
|
| 36 |
+
"name": "Introduction Endpoints",
|
| 37 |
+
"description": "Simple endpoints to try out!",
|
| 38 |
+
},
|
| 39 |
+
|
| 40 |
+
{
|
| 41 |
+
"name": "Blog Endpoints",
|
| 42 |
+
"description": "More complex endpoints that deals with actual data with **GET** and **POST** requests."
|
| 43 |
+
},
|
| 44 |
+
|
| 45 |
+
{
|
| 46 |
+
"name": "Machine Learning",
|
| 47 |
+
"description": "Prediction Endpoint."
|
| 48 |
+
}
|
| 49 |
+
]
|
| 50 |
+
|
| 51 |
+
app = FastAPI(
|
| 52 |
+
title="🪐 Jedha Demo API",
|
| 53 |
+
description=description,
|
| 54 |
+
version="0.1",
|
| 55 |
+
contact={
|
| 56 |
+
"name": "Jedha",
|
| 57 |
+
"url": "https://jedha.co",
|
| 58 |
+
},
|
| 59 |
+
openapi_tags=tags_metadata
|
| 60 |
+
)
|
| 61 |
+
|
| 62 |
+
class BlogArticles(BaseModel):
|
| 63 |
+
title: str
|
| 64 |
+
content: str
|
| 65 |
+
author: str = "Anonymous Author"
|
| 66 |
+
|
| 67 |
+
class PredictionFeatures(BaseModel):
|
| 68 |
+
YearsExperience: float
|
| 69 |
+
|
| 70 |
+
@app.get("/", tags=["Introduction Endpoints"])
|
| 71 |
+
async def index():
|
| 72 |
+
"""
|
| 73 |
+
Simply returns a welcome message!
|
| 74 |
+
"""
|
| 75 |
+
message = "Hello world! This `/` is the most simple and default endpoint. If you want to learn more, check out documentation of the api at `/docs`"
|
| 76 |
+
return message
|
| 77 |
+
|
| 78 |
+
@app.get("/greetings", tags=["Introduction Endpoints"])
|
| 79 |
+
async def greetings(name: str="Mr (or Miss) Nobody"):
|
| 80 |
+
"""
|
| 81 |
+
Say hi to anybody who's specifying their name as query parameter.
|
| 82 |
+
"""
|
| 83 |
+
greetings = {
|
| 84 |
+
"Message": f"Hello {name} How are you today?"
|
| 85 |
+
}
|
| 86 |
+
return greetings
|
| 87 |
+
|
| 88 |
+
|
| 89 |
+
@app.get("/blog-articles/{blog_id}", tags=["Blog Endpoints"])
|
| 90 |
+
async def read_blog_article(blog_id: int):
|
| 91 |
+
"""
|
| 92 |
+
Say hi to anybody who's specifying their name as query parameter.
|
| 93 |
+
|
| 94 |
+
>👋 Careful, if you change the file using `/create-blog-article` right before, the new dataframe is not right away available, you will access a previous version.
|
| 95 |
+
"""
|
| 96 |
+
|
| 97 |
+
articles = pd.read_csv("https://full-stack-assets.s3.eu-west-3.amazonaws.com/Deployment/articles.csv")
|
| 98 |
+
if blog_id > len(articles):
|
| 99 |
+
response = {
|
| 100 |
+
"msg": "We don't have that many articles!"
|
| 101 |
+
}
|
| 102 |
+
else:
|
| 103 |
+
article = pd.read_csv("https://full-stack-assets.s3.eu-west-3.amazonaws.com/Deployment/articles.csv").iloc[blog_id, :]
|
| 104 |
+
response = {
|
| 105 |
+
"title": article.title,
|
| 106 |
+
"content": article.content,
|
| 107 |
+
"author": article.author
|
| 108 |
+
}
|
| 109 |
+
|
| 110 |
+
return response
|
| 111 |
+
|
| 112 |
+
|
| 113 |
+
@app.post("/create-blog-article", tags=["Blog Endpoints"])
|
| 114 |
+
async def create_blog_article(blog_article: BlogArticles):
|
| 115 |
+
"""
|
| 116 |
+
Append a new blog article into the database which is a `.csv` file.
|
| 117 |
+
"""
|
| 118 |
+
df = pd.read_csv("https://full-stack-assets.s3.eu-west-3.amazonaws.com/Deployment/articles.csv")
|
| 119 |
+
new_article = pd.Series({
|
| 120 |
+
'id': len(df)+1,
|
| 121 |
+
'title': blog_article.title,
|
| 122 |
+
'content': blog_article.content,
|
| 123 |
+
'author': blog_article.author
|
| 124 |
+
})
|
| 125 |
+
|
| 126 |
+
df = df.append(new_article, ignore_index=True)
|
| 127 |
+
df.to_csv('s3://full-stack-assets/Deployment/articles.csv')
|
| 128 |
+
|
| 129 |
+
return df.to_json()
|
| 130 |
+
|
| 131 |
+
@app.post("/post-picture", tags=["Blog Endpoints"])
|
| 132 |
+
async def post_picture(file: UploadFile= File(...)):
|
| 133 |
+
"""
|
| 134 |
+
Upload a picture and read its file name.
|
| 135 |
+
"""
|
| 136 |
+
return {"picture": file.filename}
|
| 137 |
+
|
| 138 |
+
|
| 139 |
+
@app.post("/predict", tags=["Machine Learning"])
|
| 140 |
+
async def predict(predictionFeatures: PredictionFeatures):
|
| 141 |
+
"""
|
| 142 |
+
Prediction of salary for a given year of experience!
|
| 143 |
+
"""
|
| 144 |
+
# Read data
|
| 145 |
+
years_experience = pd.DataFrame({"YearsExperience": [predictionFeatures.YearsExperience]})
|
| 146 |
+
|
| 147 |
+
# Log model from mlflow
|
| 148 |
+
logged_model = 'runs:/5e54b2ee620546b0914c9e9fbfd18875/salary_estimator'
|
| 149 |
+
|
| 150 |
+
# Load model as a PyFuncModel.
|
| 151 |
+
loaded_model = mlflow.pyfunc.load_model(logged_model)
|
| 152 |
+
|
| 153 |
+
# If you want to load model persisted locally
|
| 154 |
+
#loaded_model = joblib.load('salary_predictor/model.joblib')
|
| 155 |
+
|
| 156 |
+
prediction = loaded_model.predict(years_experience)
|
| 157 |
+
|
| 158 |
+
# Format response
|
| 159 |
+
response = {"prediction": prediction.tolist()[0]}
|
| 160 |
+
return response
|
| 161 |
+
|
| 162 |
+
|
| 163 |
+
if __name__=="__main__":
|
| 164 |
+
uvicorn.run(app, host="0.0.0.0", port=4000)
|
data/articles.csv
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
id,title,content,author
|
| 2 |
+
1,API,"An application programming interface (API) is a connection between computers or between computer programs. It is a type of software interface, offering a service to other pieces of software.[1] A document or standard that describes how to build or use such a connection or interface is called an API specification. A computer system that meets this standard is said to implement or expose an API. The term API may refer either to the specification or to the implementation.
|
| 3 |
+
|
| 4 |
+
In contrast to a user interface, which connects a computer to a person, an application programming interface connects computers or pieces of software to each other. It is not intended to be used directly by a person (the end user) other than a computer programmer who is incorporating it into software. An API is often made up of different parts which act as tools or services that are available to the programmer. A program or a programmer that uses one of these parts is said to call that portion of the API. The calls that make up the API are also known as subroutines, methods, requests, or endpoints. An API specification defines these calls, meaning that it explains how to use or implement them.
|
| 5 |
+
|
| 6 |
+
One purpose of APIs is to hide the internal details of how a system works, exposing only those parts a programmer will find useful and keeping them consistent even if the internal details later change. An API may be custom-built for a particular pair of systems, or it may be a shared standard allowing interoperability among many systems.
|
| 7 |
+
|
| 8 |
+
The term API is often used to refer to web APIs,[2] which allow communication between computers that are joined by the internet. There are also APIs for programming languages, software libraries, computer operating systems, and computer hardware. APIs originated in the 1940s, though the term did not emerge until the 1960s and 70s.",Wikipedia
|
| 9 |
+
2,Data Science,"Data science is an interdisciplinary field that uses scientific methods, processes, algorithms and systems to extract knowledge and insights from noisy, structured and unstructured data,[1][2] and apply knowledge and actionable insights from data across a broad range of application domains. Data science is related to data mining, machine learning and big data.
|
| 10 |
+
|
| 11 |
+
Data science is a ""concept to unify statistics, data analysis, informatics, and their related methods"" in order to ""understand and analyze actual phenomena"" with data.[3] It uses techniques and theories drawn from many fields within the context of mathematics, statistics, computer science, information science, and domain knowledge. However, data science is different from computer science and information science. Turing Award winner Jim Gray imagined data science as a ""fourth paradigm"" of science (empirical, theoretical, computational, and now data-driven) and asserted that ""everything about science is changing because of the impact of information technology"" and the data deluge.[4][5]
|
| 12 |
+
|
| 13 |
+
A data scientist is someone who creates programming code, and combines it with statistical knowledge to create insights from data.",Wikipedia
|
| 14 |
+
3,Cybersecurity,"Cybersecurity is the practice of protecting systems, networks, and programs from digital attacks. These cyberattacks are usually aimed at accessing, changing, or destroying sensitive information; extorting money from users; or interrupting normal business processes.
|
| 15 |
+
|
| 16 |
+
Implementing effective cybersecurity measures is particularly challenging today because there are more devices than people, and attackers are becoming more innovative.",Cisco
|
requirements.txt
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
fastapi[standard]
|
| 2 |
+
#uvicorn[standard]
|
| 3 |
+
pydantic
|
| 4 |
+
typing
|
| 5 |
+
pandas
|
| 6 |
+
#gunicorn
|
| 7 |
+
openpyxl
|
| 8 |
+
mlflow
|
| 9 |
+
boto3
|
| 10 |
+
scikit-learn
|
| 11 |
+
python-multipart
|
| 12 |
+
fsspec
|
| 13 |
+
s3fs
|
run.sh
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
docker run -it \
|
| 2 |
+
-v "$(pwd):/home/app" \
|
| 3 |
+
-p 7860:7860 \
|
| 4 |
+
-e PORT=7860 \
|
| 5 |
+
-e MLFLOW_TRACKING_URI=$MLFLOW_TRACKING_URI \
|
| 6 |
+
-e AWS_ACCESS_KEY_ID=$AWS_ACCESS_KEY_ID \
|
| 7 |
+
-e AWS_SECRET_ACCESS_KEY=$AWS_SECRET_ACCESS_KEY \
|
| 8 |
+
-e BACKEND_STORE_URI=$BACKEND_STORE_URI \
|
| 9 |
+
-e ARTIFACT_ROOT=$ARTIFACT_ROOT \
|
| 10 |
+
api
|
salary_predictor/MLProject
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name: salary_estimator
|
| 2 |
+
docker_env:
|
| 3 |
+
image: jedha/salary_estimator
|
| 4 |
+
volumes: ["$(pwd):/home/app"]
|
| 5 |
+
environment: [
|
| 6 |
+
"MLFLOW_TRACKING_URI",
|
| 7 |
+
"AWS_ACCESS_KEY_ID",
|
| 8 |
+
"AWS_SECRET_ACCESS_KEY",
|
| 9 |
+
"BACKEND_STORE_URI",
|
| 10 |
+
"ARTIFACT_ROOT",
|
| 11 |
+
"MLFLOW_EXPERIMENT_NAME"
|
| 12 |
+
]
|
| 13 |
+
entry_points:
|
| 14 |
+
main:
|
| 15 |
+
command: "python train.py"
|
salary_predictor/data/Salary_Data.csv
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
YearsExperience,Salary
|
| 2 |
+
1.1,39343.00
|
| 3 |
+
1.3,46205.00
|
| 4 |
+
1.5,37731.00
|
| 5 |
+
2.0,43525.00
|
| 6 |
+
2.2,39891.00
|
| 7 |
+
2.9,56642.00
|
| 8 |
+
3.0,60150.00
|
| 9 |
+
3.2,54445.00
|
| 10 |
+
3.2,64445.00
|
| 11 |
+
3.7,57189.00
|
| 12 |
+
3.9,63218.00
|
| 13 |
+
4.0,55794.00
|
| 14 |
+
4.0,56957.00
|
| 15 |
+
4.1,57081.00
|
| 16 |
+
4.5,61111.00
|
| 17 |
+
4.9,67938.00
|
| 18 |
+
5.1,66029.00
|
| 19 |
+
5.3,83088.00
|
| 20 |
+
5.9,81363.00
|
| 21 |
+
6.0,93940.00
|
| 22 |
+
6.8,91738.00
|
| 23 |
+
7.1,98273.00
|
| 24 |
+
7.9,101302.00
|
| 25 |
+
8.2,113812.00
|
| 26 |
+
8.7,109431.00
|
| 27 |
+
9.0,105582.00
|
| 28 |
+
9.5,116969.00
|
| 29 |
+
9.6,112635.00
|
| 30 |
+
10.3,122391.00
|
| 31 |
+
10.5,121872.00
|
salary_predictor/docker/dockerfile
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM continuumio/miniconda3
|
| 2 |
+
|
| 3 |
+
WORKDIR /home/app
|
| 4 |
+
|
| 5 |
+
RUN apt-get update
|
| 6 |
+
RUN apt-get install nano unzip
|
| 7 |
+
RUN apt install curl -y
|
| 8 |
+
|
| 9 |
+
RUN curl -fsSL https://get.deta.dev/cli.sh | sh
|
| 10 |
+
|
| 11 |
+
RUN curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip"
|
| 12 |
+
RUN unzip awscliv2.zip
|
| 13 |
+
RUN ./aws/install
|
| 14 |
+
|
| 15 |
+
COPY requirements.txt /dependencies/requirements.txt
|
| 16 |
+
RUN pip install -r /dependencies/requirements.txt
|
| 17 |
+
|
| 18 |
+
ENV AWS_ACCESS_KEY_ID=$AWS_ACCESS_KEY_ID
|
| 19 |
+
ENV AWS_SECRET_ACCESS_KEY=$AWS_SECRET_ACCESS_KEY
|
| 20 |
+
ENV BACKEND_STORE_URI=$BACKEND_STORE_URI
|
| 21 |
+
ENV ARTIFACT_ROOT=$ARTIFACT_ROOT
|
| 22 |
+
|
| 23 |
+
CMD mlflow server -p $PORT \
|
| 24 |
+
--host 0.0.0.0 \
|
| 25 |
+
--backend-store-uri $BACKEND_STORE_URI \
|
| 26 |
+
--default-artifact-root $ARTIFACT_ROOT
|
salary_predictor/docker/requirements.txt
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
boto3
|
| 2 |
+
pandas
|
| 3 |
+
gunicorn
|
| 4 |
+
streamlit
|
| 5 |
+
sklearn
|
| 6 |
+
matplotlib
|
| 7 |
+
seaborn
|
| 8 |
+
plotly
|
| 9 |
+
mlflow
|
| 10 |
+
psycopg2-binary
|
| 11 |
+
jupyter
|
| 12 |
+
openpyxl
|
salary_predictor/model.joblib
ADDED
|
Binary file (942 Bytes). View file
|
|
|
salary_predictor/run.sh
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
docker run -it\
|
| 2 |
+
-v "$(pwd):/home/app"\
|
| 3 |
+
-e MLFLOW_TRACKING_URI=$MLFLOW_TRACKING_URI\
|
| 4 |
+
-e PORT=4000\
|
| 5 |
+
-e AWS_ACCESS_KEY_ID=$AWS_ACCESS_KEY_ID\
|
| 6 |
+
-e AWS_SECRET_ACCESS_KEY=$AWS_SECRET_ACCESS_KEY\
|
| 7 |
+
-e BACKEND_STORE_URI=$BACKEND_STORE_URI\
|
| 8 |
+
-e ARTIFACT_ROOT=$ARTIFACT_ROOT\
|
| 9 |
+
-e MLFLOW_EXPERIMENT_NAME=$MLFLOW_EXPERIMENT_NAME\
|
| 10 |
+
salary_estimator
|
salary_predictor/train.py
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
import time
|
| 3 |
+
import mlflow
|
| 4 |
+
from mlflow.models.signature import infer_signature
|
| 5 |
+
from sklearn.model_selection import train_test_split
|
| 6 |
+
from sklearn.linear_model import LinearRegression
|
| 7 |
+
import joblib
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
if __name__ == "__main__":
|
| 11 |
+
|
| 12 |
+
print("training model...")
|
| 13 |
+
|
| 14 |
+
# Time execution
|
| 15 |
+
start_time = time.time()
|
| 16 |
+
|
| 17 |
+
# Call mlflow autolog
|
| 18 |
+
mlflow.sklearn.autolog(log_models=False) # We won't log models right away
|
| 19 |
+
|
| 20 |
+
# Import dataset
|
| 21 |
+
df = pd.read_csv("data/Salary_Data.csv")
|
| 22 |
+
|
| 23 |
+
# X, y split
|
| 24 |
+
X = df.loc[:, ["YearsExperience"]]
|
| 25 |
+
y = df.loc[:, ["Salary"]]
|
| 26 |
+
|
| 27 |
+
# Train / test split
|
| 28 |
+
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3)
|
| 29 |
+
|
| 30 |
+
# Log experiment to MLFlow
|
| 31 |
+
with mlflow.start_run() as run:
|
| 32 |
+
model = LinearRegression()
|
| 33 |
+
model.fit(X_train, y_train)
|
| 34 |
+
predictions = model.predict(X_train)
|
| 35 |
+
|
| 36 |
+
# Log model seperately to have more flexibility on setup
|
| 37 |
+
mlflow.sklearn.log_model(sk_model=model, artifact_path="salary_estimator")
|
| 38 |
+
|
| 39 |
+
# If you want to persist model locally
|
| 40 |
+
#joblib.dump(model, "model.joblib")
|
| 41 |
+
|
| 42 |
+
print("...Done!")
|
| 43 |
+
print(f"---Total training time: {time.time()-start_time}")
|
secrets.sh
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
export MLFLOW_EXPERIMENT_NAME="salary_estimator";
|
| 2 |
+
export MLFLOW_TRACKING_URI="MLFLOW_TRACKING_URI";
|
| 3 |
+
export AWS_ACCESS_KEY_ID="AWS_ACCESS_KEY_ID";
|
| 4 |
+
export AWS_SECRET_ACCESS_KEY="AWS_SECRET_ACCESS_KEY";
|
| 5 |
+
export BACKEND_STORE_URI="BACKEND_STORE_URI";
|
| 6 |
+
export ARTIFACT_ROOT="ARTIFACT_ROOT";
|
test.py
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import requests
|
| 2 |
+
payload = {
|
| 3 |
+
"title": "This is my great blog title",
|
| 4 |
+
"content": "This is the body of my article",
|
| 5 |
+
"Author": "Jaskier"
|
| 6 |
+
}
|
| 7 |
+
r = requests.post("http://localhost:4000/predict", json={
|
| 8 |
+
"YearsExperience": 0
|
| 9 |
+
})
|
| 10 |
+
print(r.content)
|
test_heroku.py
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import requests
|
| 2 |
+
payload = {
|
| 3 |
+
"title": "This is my great blog title",
|
| 4 |
+
"content": "This is the body of my article",
|
| 5 |
+
"Author": "Jaskier"
|
| 6 |
+
}
|
| 7 |
+
r = requests.post("https://aurelie-fast-api.herokuapp.com/predict", json={
|
| 8 |
+
"YearsExperience": 0
|
| 9 |
+
})
|
| 10 |
+
print(r.content)
|