suneeldk commited on
Commit
d7ce295
Β·
verified Β·
1 Parent(s): 390f54a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -9
app.py CHANGED
@@ -2,17 +2,22 @@ import gradio as gr
2
  import json
3
  import spaces
4
  import torch
5
- from unsloth import FastLanguageModel
 
6
 
7
  # ── Load model once at startup ──────────────────────────────
8
- MODEL_ID = "suneeldk/json-extract"
 
9
 
10
- model, tokenizer = FastLanguageModel.from_pretrained(
11
- MODEL_ID,
12
- max_seq_length=2048,
13
- load_in_4bit=True,
 
 
14
  )
15
- FastLanguageModel.for_inference(model)
 
16
 
17
 
18
  # ── Inference function ──────────────────────────────────────
@@ -29,7 +34,7 @@ def extract(text, schema_text):
29
  return "Invalid JSON schema. Please check the format."
30
 
31
  prompt = f"### Input: {text}\n### Schema: {json.dumps(schema)}\n### Output:"
32
- inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
33
 
34
  with torch.no_grad():
35
  outputs = model.generate(
@@ -38,7 +43,6 @@ def extract(text, schema_text):
38
  temperature=0.1,
39
  do_sample=True,
40
  pad_token_id=tokenizer.eos_token_id,
41
- use_cache=False,
42
  )
43
 
44
  result = tokenizer.decode(outputs[0], skip_special_tokens=True)
 
2
  import json
3
  import spaces
4
  import torch
5
+ from transformers import AutoModelForCausalLM, AutoTokenizer
6
+ from peft import PeftModel
7
 
8
  # ── Load model once at startup ──────────────────────────────
9
+ BASE_MODEL = "Qwen/Qwen2.5-1.5B-Instruct"
10
+ LORA_MODEL = "YOUR_USERNAME/json-extract" # ← change this
11
 
12
+ tokenizer = AutoTokenizer.from_pretrained(LORA_MODEL)
13
+
14
+ base_model = AutoModelForCausalLM.from_pretrained(
15
+ BASE_MODEL,
16
+ torch_dtype=torch.float16,
17
+ device_map="auto",
18
  )
19
+ model = PeftModel.from_pretrained(base_model, LORA_MODEL)
20
+ model.eval()
21
 
22
 
23
  # ── Inference function ──────────────────────────────────────
 
34
  return "Invalid JSON schema. Please check the format."
35
 
36
  prompt = f"### Input: {text}\n### Schema: {json.dumps(schema)}\n### Output:"
37
+ inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
38
 
39
  with torch.no_grad():
40
  outputs = model.generate(
 
43
  temperature=0.1,
44
  do_sample=True,
45
  pad_token_id=tokenizer.eos_token_id,
 
46
  )
47
 
48
  result = tokenizer.decode(outputs[0], skip_special_tokens=True)