Mayur74 commited on
Commit
59291d3
·
verified ·
1 Parent(s): 3f1e08f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -58
app.py CHANGED
@@ -1,64 +1,23 @@
1
- from fastapi import FastAPI
2
- from pydantic import BaseModel
3
- from transformers import AutoTokenizer, AutoModelForCausalLM
4
- import torch
5
-
6
- MODEL_NAME = "Mayur74/tinyllama-finetune-mayur"
7
-
8
- app = FastAPI()
9
-
10
- # -----------------------------
11
- # LOAD MODEL
12
- # -----------------------------
13
- print("Loading model...")
14
-
15
- tokenizer = AutoTokenizer.from_pretrained(
16
- MODEL_NAME,
17
- use_auth_token=True
18
- )
19
-
20
- model = AutoModelForCausalLM.from_pretrained(
21
- MODEL_NAME,
22
- torch_dtype=torch.float16,
23
- device_map="auto",
24
- use_auth_token=True
25
- )
26
-
27
- print("Model loaded successfully.")
28
-
29
-
30
- # -----------------------------
31
- # INPUT SCHEMA
32
- # -----------------------------
33
- class InputData(BaseModel):
34
- prompt: str
35
- max_new_tokens: int = 150
36
- temperature: float = 0.7
37
-
38
-
39
- # -----------------------------
40
- # HOME ROUTE
41
- # -----------------------------
42
- @app.get("/")
43
- def home():
44
- return {"status": "running", "message": "TinyLlama API Online 🚀"}
45
-
46
-
47
- # -----------------------------
48
- # GENERATION ENDPOINT
49
- # -----------------------------
50
- @app.post("/generate")
51
- def generate_text(data: InputData):
52
-
53
- inputs = tokenizer(data.prompt, return_tensors="pt").to(model.device)
54
 
 
 
55
  output = model.generate(
56
  **inputs,
57
- max_new_tokens=data.max_new_tokens,
58
- temperature=data.temperature,
59
  do_sample=True
60
  )
 
 
 
 
 
 
 
 
 
 
 
61
 
62
- result = tokenizer.decode(output[0], skip_special_tokens=True)
63
-
64
- return {"response": result}
 
1
+ import gradio as gr
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
+ def generate(prompt, max_new_tokens=80, temperature=0.7):
4
+ inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
5
  output = model.generate(
6
  **inputs,
7
+ max_new_tokens=max_new_tokens,
8
+ temperature=temperature,
9
  do_sample=True
10
  )
11
+ return tokenizer.decode(output[0], skip_special_tokens=True)
12
+
13
+ iface = gr.Interface(
14
+ fn=generate,
15
+ inputs=[
16
+ gr.Textbox(label="Prompt"),
17
+ gr.Slider(1, 500, value=80, label="Max New Tokens"),
18
+ gr.Slider(0, 1, value=0.7, label="Temperature")
19
+ ],
20
+ outputs="text"
21
+ )
22
 
23
+ iface.launch()