Spaces:

mrpoons-studio
/

DeepSeek-R1

Runtime error

App Files Files Community

mrpoons-studio commited on Feb 13, 2025

Commit

7b81a62

verified ·

1 Parent(s): 79739eb

Upload 2 files

Browse files

Files changed (2) hide show

app.py +39 -77
requirements.txt +4 -4

app.py CHANGED Viewed

@@ -1,16 +1,12 @@
 import os
 from threading import Thread
 from typing import Iterator
 import gradio as gr
-import spaces
-import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
 MAX_MAX_NEW_TOKENS = 2048
 DEFAULT_MAX_NEW_TOKENS = 2048
-total_count=0
 MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "128000"))
 DESCRIPTION = """\
@@ -20,114 +16,80 @@ This space demonstrates model [DeepSeek-R1](https://huggingface.co/deepseek-ai/d
 **You can also try our R1 model in [official homepage](https://r1.deepseek.com/chat).**
 """
 if not torch.cuda.is_available():
     DESCRIPTION += "\n<p>Running on CPU 🥶 This demo does not work on CPU.</p>"
 if torch.cuda.is_available():
     model_id = "deepseek-ai/deepseek-r1"
     model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.bfloat16, device_map="auto")
     tokenizer = AutoTokenizer.from_pretrained(model_id)
     tokenizer.use_default_system_prompt = False
 @spaces.GPU
-def generate(
-    message: str,
-    chat_history: list[tuple[str, str]],
-    system_prompt: str,
-    max_new_tokens: int = 2048,
-    temperature: float = 0,
-    top_p: float = 0,
-    top_k: int = 50,
-    repetition_penalty: float = 2,
-) -> Iterator[str]:
-    global total_count
-    total_count += 1
-    print(total_count)
-    os.system("nvidia-smi")
     conversation = []
     if system_prompt:
         conversation.append({"role": "system", "content": system_prompt})
     for user, assistant in chat_history:
-        conversation.extend([{"role": "user", "content": user}, {"role": "assistant", "content": assistant}])
     conversation.append({"role": "user", "content": message})
     input_ids = tokenizer.apply_chat_template(conversation, return_tensors="pt")
     if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
         input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:]
         gr.Warning(f"Trimmed input from conversation as it was longer than {MAX_INPUT_TOKEN_LENGTH} tokens.")
     input_ids = input_ids.to(model.device)
     streamer = TextIteratorStreamer(tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True)
-    generate_kwargs = dict(
-        {"input_ids": input_ids},
-        streamer=streamer,
-        max_new_tokens=max_new_tokens,
-        do_sample=False,
-        top_p=top_p,
-        top_k=top_k,
-        num_beams=1,
-        # temperature=temperature,
-        repetition_penalty=repetition_penalty,
-        eos_token_id=32021
-    )
     t = Thread(target=model.generate, kwargs=generate_kwargs)
     t.start()
     outputs = []
     for text in streamer:
         outputs.append(text)
-        yield "".join(outputs).replace("<|EOT|>","")
 chat_interface = gr.ChatInterface(
     fn=generate,
     additional_inputs=[
         gr.Textbox(label="System prompt", lines=6),
-        gr.Slider(
-            label="Max new tokens",
-            minimum=0,
-            maximum=MAX_MAX_NEW_TOKENS,
-            step=0.01,
-            value=DEFAULT_MAX_NEW_TOKENS,
-        ),
-        # gr.Slider(
-        #     label="Temperature",
-        #     minimum=0,
-        #     maximum=4.0,
-        #     step=0.01,
-        #     value=0,
-        # ),
-        gr.Slider(
-            label="Top-p (nucleus sampling)",
-            minimum=0,
-            maximum=1.0,
-            step=0.01,
-            value=0,
-        ),
-        gr.Slider(
-            label="Top-k",
-            minimum=1,
-            maximum=1000,
-            step=0.01,
-            value=50,
-        ),
-        gr.Slider(
-            label="Repetition penalty",
-            minimum=1.0,
-            maximum=2.0,
-            step=0.01,
-            value=2,
-        ),
     ],
     stop_btn=gr.Button("Stop"),
     examples=[
         ["implement snake game using pygame"],
         ["Can you explain briefly to me what is the Python programming language?"],
-        ["write a program to find the factorial of a number"],
     ],
 )

 import os
 from threading import Thread
 from typing import Iterator
 import gradio as gr
+import spaces, torch, requests
 from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
 MAX_MAX_NEW_TOKENS = 2048
 DEFAULT_MAX_NEW_TOKENS = 2048
 MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "128000"))
 DESCRIPTION = """\
 **You can also try our R1 model in [official homepage](https://r1.deepseek.com/chat).**
 """
 if not torch.cuda.is_available():
     DESCRIPTION += "\n<p>Running on CPU 🥶 This demo does not work on CPU.</p>"
 if torch.cuda.is_available():
     model_id = "deepseek-ai/deepseek-r1"
     model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.bfloat16, device_map="auto")
     tokenizer = AutoTokenizer.from_pretrained(model_id)
     tokenizer.use_default_system_prompt = False
 @spaces.GPU
+def generate(message: str,
+             chat_history: list[tuple[str, str]],
+             system_prompt: str,
+             max_new_tokens: int = 2048,
+             temperature: float = 0,
+             top_p: float = 0,
+             top_k: int = 50,
+             repetition_penalty: float = 2,
+             search_query: str = "") -> Iterator[str]:
     conversation = []
     if system_prompt:
         conversation.append({"role": "system", "content": system_prompt})
+    if search_query:
+        try:
+            r = requests.get(f"https://api.duckduckgo.com/?q={search_query}&format=json", timeout=5)
+            data = r.json()
+            result = data.get("AbstractText", "")
+            if result:
+                conversation.append({"role": "system", "content": f"Search results for '{search_query}': {result}"})
+        except Exception as e:
+            conversation.append({"role": "system", "content": f"Search error: {e}"})
     for user, assistant in chat_history:
+        conversation.extend([{"role": "user", "content": user},
+                             {"role": "assistant", "content": assistant}])
     conversation.append({"role": "user", "content": message})
     input_ids = tokenizer.apply_chat_template(conversation, return_tensors="pt")
     if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
         input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:]
         gr.Warning(f"Trimmed input from conversation as it was longer than {MAX_INPUT_TOKEN_LENGTH} tokens.")
     input_ids = input_ids.to(model.device)
     streamer = TextIteratorStreamer(tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True)
+    generate_kwargs = {
+        "input_ids": input_ids,
+        "streamer": streamer,
+        "max_new_tokens": max_new_tokens,
+        "do_sample": False,
+        "top_p": top_p,
+        "top_k": top_k,
+        "num_beams": 1,
+        "repetition_penalty": repetition_penalty,
+        "eos_token_id": 32021
+    }
     t = Thread(target=model.generate, kwargs=generate_kwargs)
     t.start()
     outputs = []
     for text in streamer:
         outputs.append(text)
+        yield "".join(outputs).replace("<|EOT|>", "")
 chat_interface = gr.ChatInterface(
     fn=generate,
     additional_inputs=[
         gr.Textbox(label="System prompt", lines=6),
+        gr.Slider(label="Max new tokens", minimum=0, maximum=MAX_MAX_NEW_TOKENS, step=0.01, value=DEFAULT_MAX_NEW_TOKENS),
+        gr.Slider(label="Top-p (nucleus sampling)", minimum=0, maximum=1.0, step=0.01, value=0),
+        gr.Slider(label="Top-k", minimum=1, maximum=1000, step=0.01, value=50),
+        gr.Slider(label="Repetition penalty", minimum=1.0, maximum=2.0, step=0.01, value=2),
+        gr.Textbox(label="Search Query (Optional)", placeholder="Enter search query to fetch online info", lines=1)
     ],
     stop_btn=gr.Button("Stop"),
     examples=[
         ["implement snake game using pygame"],
         ["Can you explain briefly to me what is the Python programming language?"],
+        ["write a program to find the factorial of a number"]
     ],
 )

requirements.txt CHANGED Viewed

@@ -1,9 +1,9 @@
-accelerate==0.23.0
 bitsandbytes==0.41.1
-gradio==3.48.0
 protobuf==3.20.3
 scipy==1.11.2
 sentencepiece==0.1.99
 spaces==0.16.1
-torch==2.0.0
-transformers==4.34.0

+accelerate==0.23.2
 bitsandbytes==0.41.1
+gradio==3.50.1
 protobuf==3.20.3
 scipy==1.11.2
 sentencepiece==0.1.99
 spaces==0.16.1
+torch==2.0.1
+transformers==4.35.1