Spaces:

amkyawdev
/

amkyaw-coder

Sleeping

amkyawdev commited on Apr 18

Commit

125cba5

verified ·

1 Parent(s): 640cc6d

Upload folder using huggingface_hub

Files changed (2) hide show

app.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import gradio as gr
-from ctransformers import AutoModelForCausalLM
-import os
 # Model repo and filename
 model_repo = "amkyawdev/amkyaw-dev-v1"
@@ -12,26 +12,29 @@ llm = None
 def get_llm():
     global llm
     if llm is None:
-        # Download and load model from Hugging Face (public model, no token needed)
-        llm = AutoModelForCausalLM.from_pretrained(
-            model_repo,
-            model_file=model_filename,
-            model_type="llama",
-            context_length=1024,
-            threads=2
         )
     return llm
-def generate(prompt, temperature=0.8, max_tokens=256):
     try:
         model = get_llm()
         response = model(
             prompt,
-            max_new_tokens=max_tokens,
             temperature=temperature,
-            stop=["</s>", "assistant:"]
         )
-        return response
     except Exception as e:
         return f"Error: {str(e)}"
@@ -42,7 +45,7 @@ with gr.Blocks(title="amkyaw-coder") as demo:
         with gr.Column():
             prompt = gr.Textbox(label="Prompt", lines=4, placeholder="Enter your prompt here...")
             temperature = gr.Slider(0.1, 2.0, value=0.8, step=0.1, label="Temperature")
-            max_tokens = gr.Slider(64, 1024, value=256, step=64, label="Max Tokens")
             submit = gr.Button("Generate", variant="primary")
         with gr.Column():

 import gradio as gr
+from llama_cpp import Llama
+from huggingface_hub import hf_hub_download
 # Model repo and filename
 model_repo = "amkyawdev/amkyaw-dev-v1"
 def get_llm():
     global llm
     if llm is None:
+        model_path = hf_hub_download(
+            repo_id=model_repo,
+            filename=model_filename
+        )
+        llm = Llama(
+            model_path=model_path,
+            n_ctx=512,
+            n_threads=2,
+            n_gpu_layers=0,
+            verbose=False
         )
     return llm
+def generate(prompt, temperature=0.8, max_tokens=128):
     try:
         model = get_llm()
         response = model(
             prompt,
+            max_tokens=max_tokens,
             temperature=temperature,
+            stop=["</s>", "assistant"]
         )
+        return response["choices"][0]["text"].strip()
     except Exception as e:
         return f"Error: {str(e)}"
         with gr.Column():
             prompt = gr.Textbox(label="Prompt", lines=4, placeholder="Enter your prompt here...")
             temperature = gr.Slider(0.1, 2.0, value=0.8, step=0.1, label="Temperature")
+            max_tokens = gr.Slider(32, 512, value=128, step=32, label="Max Tokens")
             submit = gr.Button("Generate", variant="primary")
         with gr.Column():

requirements.txt CHANGED Viewed

@@ -1,3 +1,3 @@
 gradio>=4.0.0
 huggingface_hub>=0.20.0
-ctransformers>=0.2.0

 gradio>=4.0.0
 huggingface_hub>=0.20.0
+llama-cpp-python>=0.2.0