ranggafermata commited on
Commit
4d1219f
Β·
verified Β·
1 Parent(s): 0c15224

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +43 -0
app.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import AutoModelForCausalLM, AutoTokenizer
3
+ import torch
4
+ from huggingface_hub import login
5
+ import re
6
+
7
+ login(token=os.getenv("HF_TOKEN"))
8
+
9
+
10
+ # Load the model and tokenizer
11
+ model_name = "ranggafermata/Fermata-v1.2-lightcoder"
12
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
13
+ model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32)
14
+ model.eval()
15
+
16
+ def generate_code(prompt, max_tokens, temperature, top_p):
17
+ inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
18
+ with torch.no_grad():
19
+ outputs = model.generate(
20
+ **inputs,
21
+ max_new_tokens=max_tokens,
22
+ temperature=temperature,
23
+ top_p=top_p,
24
+ do_sample=True,
25
+ pad_token_id=tokenizer.eos_token_id
26
+ )
27
+ return tokenizer.decode(outputs[0], skip_special_tokens=True)
28
+
29
+ # Gradio Interface
30
+ iface = gr.Interface(
31
+ fn=generate_code,
32
+ inputs=[
33
+ gr.Textbox(lines=5, label="Prompt", placeholder="Write your prompt here..."),
34
+ gr.Slider(minimum=10, maximum=512, value=128, step=8, label="Max Tokens"),
35
+ gr.Slider(minimum=0.1, maximum=1.5, value=0.8, step=0.1, label="Temperature"),
36
+ gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)")
37
+ ],
38
+ outputs=gr.Textbox(lines=20, label="Generated Code"),
39
+ title="Fermata v1.2 LightCoder",
40
+ description="Code generator powered by TinyLlama fine-tuned on math/code tasks."
41
+ )
42
+
43
+ iface.launch(mcp_server=True)