teamaMohamed115 commited on
Commit
b4ce389
·
verified ·
1 Parent(s): b33a6b7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +78 -15
app.py CHANGED
@@ -1,22 +1,85 @@
1
  import gradio as gr
2
- from transformers import AutoTokenizer, AutoModelForSequenceClassification
 
 
3
 
4
- model_name = "teamaMohamed115/smollm-360m-code-lora"
5
 
6
- tokenizer = AutoTokenizer.from_pretrained(model_name)
7
- model = AutoModelForSequenceClassification.from_pretrained(model_name)
8
 
9
- def predict(text):
10
- inputs = tokenizer(text, return_tensors="pt")
11
- outputs = model(**inputs)
12
- logits = outputs.logits.softmax(dim=-1)
13
- return {str(i): float(logits[0][i]) for i in range(len(logits[0]))}
14
 
15
- iface = gr.Interface(
16
- fn=predict,
17
- inputs=gr.Textbox(lines=3, label="Enter text"),
18
- outputs=gr.Label(label="Predictions"),
19
- title="My Model Demo"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  )
21
 
22
- iface.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
+ import os
3
+ import torch
4
+ from transformers import AutoTokenizer, AutoModelForCausalLM
5
 
 
6
 
7
+ MODEL_ID = os.environ.get("HF_MODEL_ID", "teamaMohamed115/smollm-360m-code-lora")
8
+ DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
9
 
 
 
 
 
 
10
 
11
+ # Safe loader: try with device_map for HF inference if possible
12
+ print(f"Loading tokenizer and model from {MODEL_ID} on {DEVICE}")
13
+
14
+
15
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, use_fast=True)
16
+ # If the model was pushed with custom config (like trusting remote code), we handle gracefully
17
+ try:
18
+ model = AutoModelForCausalLM.from_pretrained(MODEL_ID, trust_remote_code=True)
19
+ except Exception:
20
+ model = AutoModelForCausalLM.from_pretrained(MODEL_ID)
21
+
22
+
23
+ model.to(DEVICE)
24
+ model.eval()
25
+
26
+
27
+ # Generation helper
28
+ GEN_KWARGS = dict(
29
+ max_new_tokens=256,
30
+ do_sample=True,
31
+ temperature=0.2,
32
+ top_p=0.95,
33
+ top_k=50,
34
+ num_return_sequences=1,
35
+ )
36
+
37
+
38
+ PROMPT_TEMPLATE = (
39
+ "# Instruction:\n{instruction}\n\n# Response (provide a Python module with multiple functions):\n"
40
  )
41
 
42
+
43
+
44
+
45
+ def generate_code(instruction: str, max_tokens: int = 256, temperature: float = 0.2, top_p: float = 0.95):
46
+ if not instruction.strip():
47
+ return "Please provide an instruction or problem statement."
48
+
49
+
50
+ prompt = PROMPT_TEMPLATE.format(instruction=instruction.strip())
51
+ inputs = tokenizer(prompt, return_tensors="pt")
52
+ input_ids = inputs["input_ids"].to(DEVICE)
53
+ attention_mask = inputs.get("attention_mask")
54
+ if attention_mask is not None:
55
+ attention_mask = attention_mask.to(DEVICE)
56
+
57
+
58
+ gen_kwargs = GEN_KWARGS.copy()
59
+ gen_kwargs.update({
60
+ "max_new_tokens": int(max_tokens),
61
+ "temperature": float(temperature),
62
+ "top_p": float(top_p),
63
+ })
64
+
65
+
66
+ with torch.no_grad():
67
+ outputs = model.generate(input_ids=input_ids, attention_mask=attention_mask, **gen_kwargs)
68
+
69
+
70
+ decoded = tokenizer.decode(outputs[0], skip_special_tokens=True)
71
+
72
+
73
+ # Strip the prompt prefix from the decoded text if present
74
+ if decoded.startswith(prompt):
75
+ decoded = decoded[len(prompt):]
76
+
77
+
78
+ return decoded.strip()
79
+
80
+
81
+
82
+
83
+ with gr.Blocks(title="SmolLM Python Code Assistant") as demo:
84
+ gr.Markdown("# SmolLM — Python Code Generation\nEnter an instruction and get a multi-function Python module.")
85
+ demo.launch()