Spaces:

Karlgorithm
/

Karl_demo

Sleeping

App Files Files Community

Karlgorithm commited on May 13, 2025

Commit

a80221c

verified ·

1 Parent(s): d3f2cd1

Update app.py

Browse files

Files changed (1) hide show

app.py +32 -20

app.py CHANGED Viewed

@@ -1,40 +1,51 @@
-import requests
 import gradio as gr
-# Minimal Configuration
-API_URL = "http://127.0.0.1:8080/v1/chat/completions"
 # Store the last prompt
 last_prompt = ""
 def generate(prompt):
-    """Generation function with prompt storage"""
     global last_prompt
     last_prompt = prompt
     try:
-        response = requests.post(
-            API_URL,
-            json={
-                "model": "TinyLlama-1.1B-Chat-v1.0",
-                "messages": [{"role": "user", "content": prompt}],
-                "max_tokens": 256
-            },
-            timeout=100
         )
-        return response.json()['choices'][0]['message']['content']
     except Exception as e:
         return f"Error: {str(e)}"
 def regenerate():
     """Regenerate response using last prompt"""
     if last_prompt:
         return generate(last_prompt)
     return "No previous prompt to regenerate"
-# Styled UI with improved organization
 with gr.Blocks(title="Karlson Achegeba GPT", theme=gr.themes.Soft(primary_hue="blue")) as app:
     with gr.Column(elem_classes=["center-container"]):
         # Header Section
@@ -85,9 +96,9 @@ with gr.Blocks(title="Karlson Achegeba GPT", theme=gr.themes.Soft(primary_hue="b
     # Event Handlers
     submit.click(fn=generate, inputs=prompt, outputs=output)
     regenerate_btn.click(fn=regenerate, outputs=output)
-    clear_btn.click(fn=lambda: ("", ""), outputs=[prompt, output])  # Clears both input and output
-    # Custom CSS
     app.css = """
     .center-container {
         max-width: 800px;
@@ -131,4 +142,5 @@ with gr.Blocks(title="Karlson Achegeba GPT", theme=gr.themes.Soft(primary_hue="b
         gap: 10px;
     }
     """
-app.launch(server_port=7860, share=True)

 import gradio as gr
+from transformers import AutoModelForCausalLM, AutoTokenizer
+import torch
+# Load model and tokenizer
+model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+model = AutoModelForCausalLM.from_pretrained(model_name)
 # Store the last prompt
 last_prompt = ""
 def generate(prompt):
+    """Generation function using local model"""
     global last_prompt
     last_prompt = prompt
     try:
+        # Format prompt with chat template
+        messages = [{"role": "user", "content": prompt}]
+        input_ids = tokenizer.apply_chat_template(
+            messages,
+            return_tensors="pt"
+        ).to(model.device)
+        # Generate response
+        outputs = model.generate(
+            input_ids,
+            max_new_tokens=256,
+            do_sample=True,
+            temperature=0.7,
+            top_p=0.9
         )
+        # Decode and return the response (skip the prompt)
+        response = outputs[0][input_ids.shape[-1]:]
+        return tokenizer.decode(response, skip_special_tokens=True)
     except Exception as e:
         return f"Error: {str(e)}"
 def regenerate():
     """Regenerate response using last prompt"""
     if last_prompt:
         return generate(last_prompt)
     return "No previous prompt to regenerate"
+# Create Gradio interface
 with gr.Blocks(title="Karlson Achegeba GPT", theme=gr.themes.Soft(primary_hue="blue")) as app:
     with gr.Column(elem_classes=["center-container"]):
         # Header Section
     # Event Handlers
     submit.click(fn=generate, inputs=prompt, outputs=output)
     regenerate_btn.click(fn=regenerate, outputs=output)
+    clear_btn.click(fn=lambda: ("", ""), outputs=[prompt, output])
+    # Custom CSS (same as before)
     app.css = """
     .center-container {
         max-width: 800px;
         gap: 10px;
     }
     """
+app.launch()