Spaces:

mrpoons-studio
/

DeepSeek-R1

Runtime error

App Files Files Community

mrpoons-studio commited on Feb 13, 2025

Commit

65a509c

verified ·

1 Parent(s): 27a0eb7

Upload app.py

Browse files

Files changed (1) hide show

app.py +63 -27

app.py CHANGED Viewed

@@ -2,7 +2,9 @@ import os
 from threading import Thread
 from typing import Iterator
 import gradio as gr
-import spaces, torch, requests
 from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
 MAX_MAX_NEW_TOKENS = 2048
@@ -16,11 +18,8 @@ This space demonstrates model [DeepSeek-R1](https://huggingface.co/deepseek-ai/d
 **You can also try our R1 model in [official homepage](https://r1.deepseek.com/chat).**
 """
-if not torch.cuda.is_available():
-    DESCRIPTION += "\n<p>Running on CPU 🥶 This demo does not work on CPU.</p>"
-if torch.cuda.is_available():
-    model_id = "deepseek-ai/deepseek-r1"
 if torch.cuda.is_available():
     model = AutoModelForCausalLM.from_pretrained(
         model_id, torch_dtype=torch.bfloat16, device_map="auto"
@@ -31,37 +30,56 @@ tokenizer = AutoTokenizer.from_pretrained(model_id)
 tokenizer.use_default_system_prompt = False
 @spaces.GPU
-def generate(message: str,
-             chat_history: list[tuple[str, str]],
-             system_prompt: str,
-             max_new_tokens: int = 2048,
-             temperature: float = 0,
-             top_p: float = 0,
-             top_k: int = 50,
-             repetition_penalty: float = 2,
-             search_query: str = "") -> Iterator[str]:
     conversation = []
     if system_prompt:
         conversation.append({"role": "system", "content": system_prompt})
     if search_query:
         try:
-            r = requests.get(f"https://api.duckduckgo.com/?q={search_query}&format=json", timeout=5)
             data = r.json()
             result = data.get("AbstractText", "")
             if result:
-                conversation.append({"role": "system", "content": f"Search results for '{search_query}': {result}"})
         except Exception as e:
-            conversation.append({"role": "system", "content": f"Search error: {e}"})
     for user, assistant in chat_history:
-        conversation.extend([{"role": "user", "content": user},
-                             {"role": "assistant", "content": assistant}])
     conversation.append({"role": "user", "content": message})
     input_ids = tokenizer.apply_chat_template(conversation, return_tensors="pt")
     if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
         input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:]
-        gr.Warning(f"Trimmed input from conversation as it was longer than {MAX_INPUT_TOKEN_LENGTH} tokens.")
     input_ids = input_ids.to(model.device)
-    streamer = TextIteratorStreamer(tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True)
     generate_kwargs = {
         "input_ids": input_ids,
         "streamer": streamer,
@@ -71,7 +89,7 @@ def generate(message: str,
         "top_k": top_k,
         "num_beams": 1,
         "repetition_penalty": repetition_penalty,
-        "eos_token_id": 32021
     }
     t = Thread(target=model.generate, kwargs=generate_kwargs)
     t.start()
@@ -84,17 +102,35 @@ chat_interface = gr.ChatInterface(
     fn=generate,
     additional_inputs=[
         gr.Textbox(label="System prompt", lines=6),
-        gr.Slider(label="Max new tokens", minimum=0, maximum=MAX_MAX_NEW_TOKENS, step=0.01, value=DEFAULT_MAX_NEW_TOKENS),
-        gr.Slider(label="Top-p (nucleus sampling)", minimum=0, maximum=1.0, step=0.01, value=0),
         gr.Slider(label="Top-k", minimum=1, maximum=1000, step=0.01, value=50),
-        gr.Slider(label="Repetition penalty", minimum=1.0, maximum=2.0, step=0.01, value=2),
-        gr.Textbox(label="Search Query (Optional)", placeholder="Enter search query to fetch online info", lines=1)
     ],
     stop_btn=gr.Button("Stop"),
     examples=[
         ["implement snake game using pygame"],
         ["Can you explain briefly to me what is the Python programming language?"],
-        ["write a program to find the factorial of a number"]
     ],
 )

 from threading import Thread
 from typing import Iterator
 import gradio as gr
+import spaces
+import torch
+import requests
 from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
 MAX_MAX_NEW_TOKENS = 2048
 **You can also try our R1 model in [official homepage](https://r1.deepseek.com/chat).**
 """
+model_id = "deepseek-ai/deepseek-r1"
 if torch.cuda.is_available():
     model = AutoModelForCausalLM.from_pretrained(
         model_id, torch_dtype=torch.bfloat16, device_map="auto"
 tokenizer.use_default_system_prompt = False
 @spaces.GPU
+def generate(
+    message: str,
+    chat_history: list[tuple[str, str]],
+    system_prompt: str,
+    max_new_tokens: int = 2048,
+    temperature: float = 0,
+    top_p: float = 0,
+    top_k: int = 50,
+    repetition_penalty: float = 2,
+    search_query: str = "",
+) -> Iterator[str]:
     conversation = []
     if system_prompt:
         conversation.append({"role": "system", "content": system_prompt})
     if search_query:
         try:
+            r = requests.get(
+                f"https://api.duckduckgo.com/?q={search_query}&format=json", timeout=5
+            )
             data = r.json()
             result = data.get("AbstractText", "")
             if result:
+                conversation.append(
+                    {
+                        "role": "system",
+                        "content": f"Search results for '{search_query}': {result}",
+                    }
+                )
         except Exception as e:
+            conversation.append(
+                {"role": "system", "content": f"Search error: {e}"}
+            )
     for user, assistant in chat_history:
+        conversation.extend(
+            [
+                {"role": "user", "content": user},
+                {"role": "assistant", "content": assistant},
+            ]
+        )
     conversation.append({"role": "user", "content": message})
     input_ids = tokenizer.apply_chat_template(conversation, return_tensors="pt")
     if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
         input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:]
+        gr.Warning(
+            f"Trimmed input from conversation as it was longer than {MAX_INPUT_TOKEN_LENGTH} tokens."
+        )
     input_ids = input_ids.to(model.device)
+    streamer = TextIteratorStreamer(
+        tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True
+    )
     generate_kwargs = {
         "input_ids": input_ids,
         "streamer": streamer,
         "top_k": top_k,
         "num_beams": 1,
         "repetition_penalty": repetition_penalty,
+        "eos_token_id": 32021,
     }
     t = Thread(target=model.generate, kwargs=generate_kwargs)
     t.start()
     fn=generate,
     additional_inputs=[
         gr.Textbox(label="System prompt", lines=6),
+        gr.Slider(
+            label="Max new tokens",
+            minimum=0,
+            maximum=MAX_MAX_NEW_TOKENS,
+            step=0.01,
+            value=DEFAULT_MAX_NEW_TOKENS,
+        ),
+        gr.Slider(
+            label="Top-p (nucleus sampling)",
+            minimum=0,
+            maximum=1.0,
+            step=0.01,
+            value=0,
+        ),
         gr.Slider(label="Top-k", minimum=1, maximum=1000, step=0.01, value=50),
+        gr.Slider(
+            label="Repetition penalty", minimum=1.0, maximum=2.0, step=0.01, value=2
+        ),
+        gr.Textbox(
+            label="Search Query (Optional)",
+            placeholder="Enter search query to fetch online info",
+            lines=1,
+        ),
     ],
     stop_btn=gr.Button("Stop"),
     examples=[
         ["implement snake game using pygame"],
         ["Can you explain briefly to me what is the Python programming language?"],
+        ["write a program to find the factorial of a number"],
     ],
 )