druvx13 commited on
Commit
5966b70
·
verified ·
1 Parent(s): a4116b3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +56 -36
app.py CHANGED
@@ -1,41 +1,41 @@
1
  import gradio as gr
2
- import requests
3
  import os
4
- from huggingface_hub import hf_hub_download
5
 
6
  # Model configuration
7
  MODEL_REPO = "druvx13/gpt2-Q4_K_M-GGUF"
8
  MODEL_FILE = "gpt2-q4_k_m.gguf"
9
- SERVER_PORT = 8080
 
10
 
11
- # Download model if not exists
12
- def ensure_model():
13
- if not os.path.exists(MODEL_FILE):
14
- print("Downloading model...")
15
- hf_hub_download(repo_id=MODEL_REPO, filename=MODEL_FILE, local_dir=".")
16
- return MODEL_FILE
 
 
 
 
 
17
 
18
- # Start llama.cpp server (must be done before launching Gradio)
19
- os.system(f"./llama-server --hf-repo {MODEL_REPO} --hf-file {ensure_model()} -c 2048 &")
20
 
21
- def generate_text(prompt, max_tokens=100, temp=0.7):
22
- try:
23
- response = requests.post(
24
- f"http://localhost:{SERVER_PORT}/completion",
25
- json={
26
- "prompt": prompt,
27
- "stream": False,
28
- "temperature": temp,
29
- "n_predict": max_tokens
30
- }
31
- )
32
- return response.json()["content"]
33
- except Exception as e:
34
- return f"Error: {str(e)}. Ensure server is running."
35
 
36
- # UI Configuration
37
  with gr.Blocks(theme="soft") as demo:
38
- gr.Markdown("# GPT-2 Text Generation (GGUF Version)\nPowered by llama.cpp and HuggingFace Spaces")
39
 
40
  with gr.Row():
41
  with gr.Column():
@@ -44,16 +44,36 @@ with gr.Blocks(theme="soft") as demo:
44
  placeholder="Enter your prompt here...",
45
  lines=5
46
  )
47
- max_tokens = gr.Slider(10, 500, value=100, label="Max Output Tokens")
48
- temp = gr.Slider(0.1, 1.0, value=0.7, label="Temperature")
49
- submit = gr.Button("Generate", variant="primary")
50
-
51
- output = gr.Textbox(label="Generated Text", lines=10)
52
-
53
- submit.click(
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
  fn=generate_text,
55
- inputs=[prompt, max_tokens, temp],
56
  outputs=output
57
  )
58
 
59
- demo.launch(server_port=7860)
 
1
  import gradio as gr
2
+ from llama_cpp import Llama
3
  import os
 
4
 
5
  # Model configuration
6
  MODEL_REPO = "druvx13/gpt2-Q4_K_M-GGUF"
7
  MODEL_FILE = "gpt2-q4_k_m.gguf"
8
+ CACHE_DIR = "./model_cache"
9
+ MAX_TOKENS = 200
10
 
11
+ # Initialize model (loads once at startup)
12
+ def load_model():
13
+ os.makedirs(CACHE_DIR, exist_ok=True)
14
+ return Llama(
15
+ model_path=None, # Auto-download from HF
16
+ hf_repo=MODEL_REPO,
17
+ hf_file=MODEL_FILE,
18
+ n_ctx=2048, # Context length
19
+ n_threads=4, # CPU threads
20
+ verbose=False # Disable debug logs
21
+ )
22
 
23
+ llm = load_model()
 
24
 
25
+ # Generation function
26
+ def generate_text(prompt, max_tokens=MAX_TOKENS, temp=0.7, top_p=0.95):
27
+ output = llm(
28
+ prompt=prompt,
29
+ max_tokens=max_tokens,
30
+ temperature=temp,
31
+ top_p=top_p,
32
+ echo=False
33
+ )
34
+ return output["choices"][0]["text"]
 
 
 
 
35
 
36
+ # UI components
37
  with gr.Blocks(theme="soft") as demo:
38
+ gr.Markdown("# GPT2 Text Generator (GGUF Version)\nType a prompt and generate text using the quantized GPT2 model.")
39
 
40
  with gr.Row():
41
  with gr.Column():
 
44
  placeholder="Enter your prompt here...",
45
  lines=5
46
  )
47
+ max_tokens = gr.Slider(
48
+ minimum=50,
49
+ maximum=500,
50
+ value=200,
51
+ step=50,
52
+ label="Max Output Length"
53
+ )
54
+ temp = gr.Slider(
55
+ minimum=0.1,
56
+ maximum=1.0,
57
+ value=0.7,
58
+ step=0.1,
59
+ label="Temperature"
60
+ )
61
+ top_p = gr.Slider(
62
+ minimum=0.1,
63
+ maximum=1.0,
64
+ value=0.95,
65
+ step=0.05,
66
+ label="Top-p Sampling"
67
+ )
68
+
69
+ with gr.Column():
70
+ output = gr.Textbox(label="Generated Text", lines=10)
71
+ generate_btn = gr.Button("Generate", variant="primary")
72
+
73
+ generate_btn.click(
74
  fn=generate_text,
75
+ inputs=[prompt, max_tokens, temp, top_p],
76
  outputs=output
77
  )
78
 
79
+ demo.launch()