Spaces:

druvx13
/

Ztar

Sleeping

App Files Files Community

druvx13 commited on May 28, 2025

Commit

5966b70

verified ·

1 Parent(s): a4116b3

Update app.py

Browse files

Files changed (1) hide show

app.py +56 -36

app.py CHANGED Viewed

@@ -1,41 +1,41 @@
 import gradio as gr
-import requests
 import os
-from huggingface_hub import hf_hub_download
 # Model configuration
 MODEL_REPO = "druvx13/gpt2-Q4_K_M-GGUF"
 MODEL_FILE = "gpt2-q4_k_m.gguf"
-SERVER_PORT = 8080
-# Download model if not exists
-def ensure_model():
-    if not os.path.exists(MODEL_FILE):
-        print("Downloading model...")
-        hf_hub_download(repo_id=MODEL_REPO, filename=MODEL_FILE, local_dir=".")
-    return MODEL_FILE
-# Start llama.cpp server (must be done before launching Gradio)
-os.system(f"./llama-server --hf-repo {MODEL_REPO} --hf-file {ensure_model()} -c 2048 &")
-def generate_text(prompt, max_tokens=100, temp=0.7):
-    try:
-        response = requests.post(
-            f"http://localhost:{SERVER_PORT}/completion",
-            json={
-                "prompt": prompt,
-                "stream": False,
-                "temperature": temp,
-                "n_predict": max_tokens
-            }
-        )
-        return response.json()["content"]
-    except Exception as e:
-        return f"Error: {str(e)}. Ensure server is running."
-# UI Configuration
 with gr.Blocks(theme="soft") as demo:
-    gr.Markdown("# GPT-2 Text Generation (GGUF Version)\nPowered by llama.cpp and HuggingFace Spaces")
     with gr.Row():
         with gr.Column():
@@ -44,16 +44,36 @@ with gr.Blocks(theme="soft") as demo:
                 placeholder="Enter your prompt here...",
                 lines=5
             )
-            max_tokens = gr.Slider(10, 500, value=100, label="Max Output Tokens")
-            temp = gr.Slider(0.1, 1.0, value=0.7, label="Temperature")
-            submit = gr.Button("Generate", variant="primary")
-        output = gr.Textbox(label="Generated Text", lines=10)
-    submit.click(
         fn=generate_text,
-        inputs=[prompt, max_tokens, temp],
         outputs=output
     )
-demo.launch(server_port=7860)

 import gradio as gr
+from llama_cpp import Llama
 import os
 # Model configuration
 MODEL_REPO = "druvx13/gpt2-Q4_K_M-GGUF"
 MODEL_FILE = "gpt2-q4_k_m.gguf"
+CACHE_DIR = "./model_cache"
+MAX_TOKENS = 200
+# Initialize model (loads once at startup)
+def load_model():
+    os.makedirs(CACHE_DIR, exist_ok=True)
+    return Llama(
+        model_path=None,  # Auto-download from HF
+        hf_repo=MODEL_REPO,
+        hf_file=MODEL_FILE,
+        n_ctx=2048,       # Context length
+        n_threads=4,      # CPU threads
+        verbose=False     # Disable debug logs
+    )
+llm = load_model()
+# Generation function
+def generate_text(prompt, max_tokens=MAX_TOKENS, temp=0.7, top_p=0.95):
+    output = llm(
+        prompt=prompt,
+        max_tokens=max_tokens,
+        temperature=temp,
+        top_p=top_p,
+        echo=False
+    )
+    return output["choices"][0]["text"]
+# UI components
 with gr.Blocks(theme="soft") as demo:
+    gr.Markdown("# GPT2 Text Generator (GGUF Version)\nType a prompt and generate text using the quantized GPT2 model.")
     with gr.Row():
         with gr.Column():
                 placeholder="Enter your prompt here...",
                 lines=5
             )
+            max_tokens = gr.Slider(
+                minimum=50,
+                maximum=500,
+                value=200,
+                step=50,
+                label="Max Output Length"
+            )
+            temp = gr.Slider(
+                minimum=0.1,
+                maximum=1.0,
+                value=0.7,
+                step=0.1,
+                label="Temperature"
+            )
+            top_p = gr.Slider(
+                minimum=0.1,
+                maximum=1.0,
+                value=0.95,
+                step=0.05,
+                label="Top-p Sampling"
+            )
+        with gr.Column():
+            output = gr.Textbox(label="Generated Text", lines=10)
+            generate_btn = gr.Button("Generate", variant="primary")
+    generate_btn.click(
         fn=generate_text,
+        inputs=[prompt, max_tokens, temp, top_p],
         outputs=output
     )
+demo.launch()