Spaces:

lightonai
/

LightOnOCR-1B-Demo

Running

App Files Files Community

LightOnOCR-1B-Demo / app.py

staghado

Update app.py

a608f20 verified about 2 months ago

raw

history blame

3.19 kB

	#!/usr/bin/env python3
	import os
	import json
	import requests
	import gradio as gr

	ENDPOINT = os.environ.get("VLLM_ENDPOINT") or os.getenv("VLLM_ENDPOINT")
	MODEL = os.environ.get("VLLM_MODEL") or os.getenv("VLLM_MODEL")

	print(f"Debug - Found ENDPOINT: {ENDPOINT is not None}")
	print(f"Debug - Found MODEL: {MODEL is not None}")
	print(f"Debug - All env vars: {list(os.environ.keys())}")

	if not ENDPOINT or not MODEL:
	raise ValueError("VLLM_ENDPOINT and VLLM_MODEL environment variables must be set")


	def respond(
	message,
	history: list[dict[str, str]],
	system_message,
	max_tokens,
	temperature,
	top_p,
	):
	"""
	Send messages to vLLM endpoint and stream the response.
	"""
	messages = [{"role": "system", "content": system_message}]
	messages.extend(history)
	messages.append({"role": "user", "content": message})

	payload = {
	"model": MODEL,
	"messages": messages,
	"max_tokens": max_tokens,
	"temperature": temperature,
	"top_p": top_p,
	"stream": True
	}

	try:
	response = requests.post(
	ENDPOINT,
	headers={"Content-Type": "application/json"},
	data=json.dumps(payload),
	stream=True
	)
	response.raise_for_status()

	accumulated_response = ""

	for line in response.iter_lines():
	if line:
	line = line.decode('utf-8')
	if line.startswith('data: '):
	line = line[6:] # Remove 'data: ' prefix

	if line.strip() == '[DONE]':
	break

	try:
	chunk = json.loads(line)
	if 'choices' in chunk and len(chunk['choices']) > 0:
	delta = chunk['choices'][0].get('delta', {})
	content = delta.get('content', '')
	if content:
	accumulated_response += content
	yield accumulated_response
	except json.JSONDecodeError:
	continue

	except Exception as e:
	yield f"Error: {str(e)}"


	"""
	For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
	"""
	chatbot = gr.ChatInterface(
	respond,
	type="messages",
	additional_inputs=[
	gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
	gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
	gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
	gr.Slider(
	minimum=0.1,
	maximum=1.0,
	value=0.95,
	step=0.05,
	label="Top-p (nucleus sampling)",
	),
	],
	)

	with gr.Blocks(title="vLLM Chatbot") as demo:
	gr.Markdown("# 💬 Chat Interface")
	gr.Markdown("""
	Configure the endpoint via environment variables:
	- `VLLM_ENDPOINT`: vLLM server URL
	- `VLLM_MODEL`: Model name
	""")
	chatbot.render()


	if __name__ == "__main__":
	demo.launch()