nihardon commited on
Commit
23089f1
·
verified ·
1 Parent(s): 4ab6507

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +56 -38
app.py CHANGED
@@ -1,41 +1,59 @@
1
  import gradio as gr
2
  from huggingface_hub import hf_hub_download
3
  from llama_cpp import Llama
 
 
4
 
5
- # Global variable to hold the model (starts empty)
6
  llm_model = None
 
 
7
 
8
- def load_model():
9
- """Loads the model only when needed."""
10
- global llm_model
11
 
12
- # If already loaded, just return it
13
- if llm_model is not None:
14
- return llm_model
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
- print("⏳ First-time load: Downloading/Loading model...")
17
-
18
- # 1. Download (Cached)
19
- model_path = hf_hub_download(
20
- repo_id="nihardon/fine-tuned-unit-test-generator",
21
- filename="llama-3-8b.Q4_K_M.gguf",
22
- )
23
-
24
- # 2. Load into RAM
25
- llm_model = Llama(
26
- model_path=model_path,
27
- n_ctx=1024, # Context window
28
- n_threads=2, # Use 2 threads for better speed
29
- verbose=False, # Reduce logs to prevent buffer lag
30
- )
31
-
32
- print("✅ Model loaded!")
33
- return llm_model
34
 
 
35
  def generate_test(user_code):
36
- # Load model (only happens once)
37
- llm = load_model()
 
 
 
38
 
 
39
  prompt = f"""Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
40
 
41
  ### Instruction:
@@ -46,19 +64,21 @@ You are an expert Python QA engineer. Write a pytest unit test for the following
46
 
47
  ### Response:
48
  """
49
- output = llm(
50
- prompt,
51
- max_tokens=512,
52
- stop=["### Instruction:", "### Input:"],
53
- echo=False
54
- )
55
- return output["choices"][0]["text"].strip()
 
 
 
56
 
57
  # --- The UI ---
58
- # This part runs instantly, so the Health Check passes immediately!
59
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
60
  gr.Markdown("# 🧪 AI Unit Test Generator")
61
- gr.Markdown("*Note: The first request will take ~1 minute to initialize the model.*")
62
 
63
  with gr.Row():
64
  with gr.Column():
@@ -73,6 +93,4 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
73
 
74
  btn.click(generate_test, inputs=input_box, outputs=output_box)
75
 
76
- # Launch
77
- print("🚀 Server starting...")
78
  demo.launch(server_name="0.0.0.0", server_port=7860)
 
1
  import gradio as gr
2
  from huggingface_hub import hf_hub_download
3
  from llama_cpp import Llama
4
+ import threading
5
+ import time
6
 
7
+ # --- Global State ---
8
  llm_model = None
9
+ load_status = "Starting..."
10
+ is_loaded = False
11
 
12
+ # --- Background Loader ---
13
+ def load_model_in_background():
14
+ global llm_model, load_status, is_loaded
15
 
16
+ try:
17
+ print("⏳ Background thread started...")
18
+ load_status = "⬇️ Downloading model (approx 1-2 mins)..."
19
+
20
+ # Download the model
21
+ model_path = hf_hub_download(
22
+ repo_id="nihardon/fine-tuned-unit-test-generator",
23
+ filename="llama-3-8b.Q4_K_M.gguf",
24
+ )
25
+
26
+ load_status = "🧠 Loading into RAM (approx 60s)..."
27
+ print("Loading weights...")
28
+
29
+ # Load the model (verbose=False speeds it up slightly)
30
+ llm_model = Llama(
31
+ model_path=model_path,
32
+ n_ctx=1024,
33
+ n_threads=2,
34
+ verbose=False
35
+ )
36
+
37
+ load_status = "✅ Model Ready!"
38
+ is_loaded = True
39
+ print("🚀 Model successfully loaded!")
40
+
41
+ except Exception as e:
42
+ load_status = f"❌ Error: {str(e)}"
43
+ print(load_status)
44
 
45
+ # Start the loader immediately in the background
46
+ threading.Thread(target=load_model_in_background, daemon=True).start()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
 
48
+ # --- The Generator Function ---
49
  def generate_test(user_code):
50
+ global llm_model, is_loaded, load_status
51
+
52
+ # 1. Check if model is ready
53
+ if not is_loaded or llm_model is None:
54
+ return f"⚠️ SYSTEM INITIALIZING...\n\nCurrent Status: {load_status}\n\nPlease wait 60 seconds and click Generate again."
55
 
56
+ # 2. Run Generation
57
  prompt = f"""Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
58
 
59
  ### Instruction:
 
64
 
65
  ### Response:
66
  """
67
+ try:
68
+ output = llm_model(
69
+ prompt,
70
+ max_tokens=512,
71
+ stop=["### Instruction:", "### Input:"],
72
+ echo=False
73
+ )
74
+ return output["choices"][0]["text"].strip()
75
+ except Exception as e:
76
+ return f"Error during generation: {str(e)}"
77
 
78
  # --- The UI ---
 
79
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
80
  gr.Markdown("# 🧪 AI Unit Test Generator")
81
+ gr.Markdown("**Status:** System starts automatically. If the model isn't ready, it will tell you.")
82
 
83
  with gr.Row():
84
  with gr.Column():
 
93
 
94
  btn.click(generate_test, inputs=input_box, outputs=output_box)
95
 
 
 
96
  demo.launch(server_name="0.0.0.0", server_port=7860)