Spaces:

lightonai
/

LightOnOCR-1B-Demo

Running

App Files Files Community

staghado commited on Oct 21

Commit

111a99e

verified ·

1 Parent(s): cb4b97e

Update app.py

Browse files

Files changed (1) hide show

app.py +59 -29

app.py CHANGED Viewed

@@ -1,5 +1,14 @@
 import gradio as gr
-from huggingface_hub import InferenceClient
 def respond(
@@ -9,40 +18,57 @@ def respond(
     max_tokens,
     temperature,
     top_p,
-    hf_token: gr.OAuthToken,
 ):
     """
-    For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
     """
-    client = InferenceClient(token=hf_token.token, model="openai/gpt-oss-20b")
     messages = [{"role": "system", "content": system_message}]
     messages.extend(history)
     messages.append({"role": "user", "content": message})
-    response = ""
-    for message in client.chat_completion(
-        messages,
-        max_tokens=max_tokens,
-        stream=True,
-        temperature=temperature,
-        top_p=top_p,
-    ):
-        choices = message.choices
-        token = ""
-        if len(choices) and choices[0].delta.content:
-            token = choices[0].delta.content
-        response += token
-        yield response
-"""
-For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
-"""
 chatbot = gr.ChatInterface(
     respond,
     type="messages",
@@ -60,11 +86,15 @@ chatbot = gr.ChatInterface(
     ],
 )
-with gr.Blocks() as demo:
-    with gr.Sidebar():
-        gr.LoginButton()
     chatbot.render()
 if __name__ == "__main__":
-    demo.launch()

+#!/usr/bin/env python3
+import os
+import json
+import requests
 import gradio as gr
+ENDPOINT = os.getenv("VLLM_ENDPOINT")
+MODEL = os.getenv("VLLM_MODEL")
+if not ENDPOINT or not MODEL:
+    raise ValueError("VLLM_ENDPOINT and VLLM_MODEL environment variables must be set")
 def respond(
     max_tokens,
     temperature,
     top_p,
 ):
     """
+    Send messages to vLLM endpoint and stream the response.
     """
     messages = [{"role": "system", "content": system_message}]
     messages.extend(history)
     messages.append({"role": "user", "content": message})
+    payload = {
+        "model": MODEL,
+        "messages": messages,
+        "max_tokens": max_tokens,
+        "temperature": temperature,
+        "top_p": top_p,
+        "stream": True
+    }
+    try:
+        response = requests.post(
+            ENDPOINT,
+            headers={"Content-Type": "application/json"},
+            data=json.dumps(payload),
+            stream=True
+        )
+        response.raise_for_status()
+        accumulated_response = ""
+        for line in response.iter_lines():
+            if line:
+                line = line.decode('utf-8')
+                if line.startswith('data: '):
+                    line = line[6:]  # Remove 'data: ' prefix
+                if line.strip() == '[DONE]':
+                    break
+                try:
+                    chunk = json.loads(line)
+                    if 'choices' in chunk and len(chunk['choices']) > 0:
+                        delta = chunk['choices'][0].get('delta', {})
+                        content = delta.get('content', '')
+                        if content:
+                            accumulated_response += content
+                            yield accumulated_response
+                except json.JSONDecodeError:
+                    continue
+    except Exception as e:
+        yield f"Error: {str(e)}"
 chatbot = gr.ChatInterface(
     respond,
     type="messages",
     ],
 )
+with gr.Blocks(title="vLLM Chatbot") as demo:
+    gr.Markdown("# 💬 Chat Interface")
+    gr.Markdown("""
+    Configure the endpoint via environment variables:
+    - `VLLM_ENDPOINT`: vLLM server URL
+    - `VLLM_MODEL`: Model name
+    """)
     chatbot.render()
 if __name__ == "__main__":
+    demo.launch()