SALEETAI commited on
Commit
3dcd671
·
verified ·
1 Parent(s): 7d70258

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +50 -0
app.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from unsloth import FastLanguageModel
3
+
4
+ # 1. Load your model (This takes a minute on first run)
5
+ # We use 4-bit loading to fit it on smaller hardware
6
+ model, tokenizer = FastLanguageModel.from_pretrained(
7
+ model_name = "SALEETAI/coding-agent-qwen-sft", # Your Model ID
8
+ max_seq_length = 2048,
9
+ load_in_4bit = True,
10
+ )
11
+ FastLanguageModel.for_inference(model)
12
+
13
+ # 2. Define the Chat Function
14
+ def chat_with_model(message, history):
15
+ # Format the conversation (Qwen format)
16
+ messages = []
17
+ for user_msg, bot_msg in history:
18
+ messages.append({"role": "user", "content": user_msg})
19
+ messages.append({"role": "assistant", "content": bot_msg})
20
+ messages.append({"role": "user", "content": message})
21
+
22
+ # Tokenize and Generate
23
+ inputs = tokenizer.apply_chat_template(
24
+ messages,
25
+ tokenize=True,
26
+ add_generation_prompt=True,
27
+ return_tensors="pt"
28
+ )
29
+
30
+ # Generate response (Streamed)
31
+ outputs = model.generate(
32
+ input_ids=inputs,
33
+ max_new_tokens=512,
34
+ use_cache=True
35
+ )
36
+
37
+ # Decode just the new tokens
38
+ response = tokenizer.decode(outputs[0][len(inputs[0]):], skip_special_tokens=True)
39
+ return response
40
+
41
+ # 3. Build the Interface
42
+ demo = gr.ChatInterface(
43
+ fn=chat_with_model,
44
+ title="Code Agent Qwen-7B (SFT)",
45
+ description="Ask me to write Python, SQL, or C++ code!",
46
+ examples=["Write a Python function to reverse a string.", "Fix this bug: for i in list: list.remove(i)"],
47
+ )
48
+
49
+ if __name__ == "__main__":
50
+ demo.launch()