Spaces:

Tonic
/

Convert-to-Json

Running on Zero

App Files Files Community

Convert-to-Json / app.py

Tonic

add demo with mcp enabled

ab48ce6 verified 6 months ago

raw

history blame

10.4 kB

	import gradio as gr
	import json
	import torch
	from transformers import AutoModelForCausalLM, AutoTokenizer
	import spaces

	# Model configuration
	MODEL_NAME = "osmosis-ai/Osmosis-Structure-0.6B"

	# Global variables to store the model and tokenizer
	model = None
	tokenizer = None

	def load_model():
	"""Load the Osmosis Structure model and tokenizer"""
	global model, tokenizer

	try:
	print("Loading Osmosis Structure model...")

	# Load tokenizer
	tokenizer = AutoTokenizer.from_pretrained(
	MODEL_NAME,
	trust_remote_code=True
	)

	# Load model
	model = AutoModelForCausalLM.from_pretrained(
	MODEL_NAME,
	torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
	device_map="auto" if torch.cuda.is_available() else None,
	trust_remote_code=True
	)

	print("✅ Osmosis Structure model loaded successfully!")
	return True

	except Exception as e:
	print(f"❌ Error loading model: {e}")
	return False

	@spaces.GPU
	def text_to_json(input_text, max_tokens=512, temperature=0.6, top_p=0.95, top_k=20):
	"""Convert plain text to structured JSON using Osmosis Structure model"""
	global model, tokenizer

	if model is None or tokenizer is None:
	return "❌ Model not loaded. Please wait for model initialization."

	try:
	# Create a structured prompt for JSON conversion
	messages = [
	{
	"role": "system",
	"content": "You are a helpful assistant that converts unstructured text into well-formatted JSON. Extract key information and organize it into a logical, structured format. Always respond with valid JSON."
	},
	{
	"role": "user",
	"content": f"Convert this text to JSON format:\n\n{input_text}"
	}
	]

	# Apply chat template
	formatted_prompt = tokenizer.apply_chat_template(
	messages,
	tokenize=False,
	add_generation_prompt=True
	)

	# Tokenize the input
	inputs = tokenizer(
	formatted_prompt,
	return_tensors="pt",
	truncation=True,
	max_length=2048
	)

	# Move to device if using GPU
	if torch.cuda.is_available():
	inputs = {k: v.to(model.device) for k, v in inputs.items()}

	# Generation parameters based on model config
	generation_config = {
	"max_new_tokens": max_tokens,
	"temperature": temperature,
	"top_p": top_p,
	"top_k": top_k,
	"do_sample": True,
	"pad_token_id": tokenizer.pad_token_id,
	"eos_token_id": tokenizer.eos_token_id,
	"repetition_penalty": 1.1,
	}

	# Generate response
	with torch.no_grad():
	outputs = model.generate(
	**inputs,
	**generation_config
	)

	# Decode the response
	generated_tokens = outputs[0][len(inputs["input_ids"][0]):]
	generated_text = tokenizer.decode(generated_tokens, skip_special_tokens=True)

	# Clean up the response
	generated_text = generated_text.strip()

	# Try to extract JSON from the response
	json_start = generated_text.find('{')
	json_end = generated_text.rfind('}')

	if json_start != -1 and json_end != -1 and json_end > json_start:
	json_text = generated_text[json_start:json_end+1]
	else:
	# If no clear JSON boundaries, try to clean the whole response
	json_text = generated_text

	# Remove common prefixes
	prefixes_to_remove = ["```json", "```", "Here's the JSON:", "JSON:", "```json\n"]
	for prefix in prefixes_to_remove:
	if json_text.startswith(prefix):
	json_text = json_text[len(prefix):].strip()

	# Remove common suffixes
	suffixes_to_remove = ["```", "\n```"]
	for suffix in suffixes_to_remove:
	if json_text.endswith(suffix):
	json_text = json_text[:-len(suffix)].strip()

	# Validate and format JSON
	try:
	parsed_json = json.loads(json_text)
	return json.dumps(parsed_json, indent=2, ensure_ascii=False)
	except json.JSONDecodeError:
	# If still not valid JSON, return the cleaned text with a note
	return f"Generated response (may need manual cleanup):\n\n{json_text}"

	except Exception as e:
	return f"❌ Error generating JSON: {str(e)}"

	# Create Gradio interface
	def create_demo():
	with gr.Blocks(
	title="Osmosis Structure - Text to JSON Converter",
	theme=gr.themes.Soft()
	) as demo:

	gr.Markdown("""
	# 🌊 Osmosis Structure - Text to JSON Converter

	Convert unstructured text into well-formatted JSON using the Osmosis Structure 0.6B model.
	This model is specifically trained for structured data extraction and format conversion.
	""")

	gr.Markdown("""
	### ℹ️ About Osmosis Structure

	- Model: Osmosis Structure 0.6B parameters
	- Architecture: Qwen3 (specialized for structured data)
	- Purpose: Converting unstructured text to structured JSON format
	- Optimizations: Fine-tuned for data extraction and format conversion tasks

	The model automatically identifies key information in your text and organizes it into logical JSON structures.
	""")

	with gr.Row():
	with gr.Column(scale=1):
	input_text = gr.Textbox(
	label="📝 Input Text",
	placeholder="Enter your unstructured text here...\n\nExample: 'John Smith is a 30-year-old software engineer from New York. He works at Tech Corp and has 5 years of experience in Python development.'",
	lines=8,
	max_lines=15
	)

	with gr.Accordion("⚙️ Generation Settings", open=False):
	max_tokens = gr.Slider(
	minimum=50,
	maximum=1000,
	value=512,
	step=10,
	label="Max Tokens",
	info="Maximum number of tokens to generate"
	)

	temperature = gr.Slider(
	minimum=0.1,
	maximum=1.0,
	value=0.6,
	step=0.1,
	label="Temperature",
	info="Controls randomness (lower = more focused)"
	)

	top_p = gr.Slider(
	minimum=0.1,
	maximum=1.0,
	value=0.95,
	step=0.05,
	label="Top-p",
	info="Nucleus sampling parameter"
	)

	top_k = gr.Slider(
	minimum=1,
	maximum=100,
	value=20,
	step=1,
	label="Top-k",
	info="Limits vocabulary for generation"
	)

	convert_btn = gr.Button(
	"🔄 Convert to JSON",
	variant="primary",
	size="lg"
	)

	with gr.Column(scale=1):
	output_json = gr.Textbox(
	label="📋 Generated JSON",
	lines=15,
	max_lines=20,
	interactive=False,
	show_copy_button=True
	)

	# Example inputs
	gr.Markdown("### 📚 Example Inputs")
	examples = gr.Examples(
	examples=[
	["John Smith is a 30-year-old software engineer from New York. He works at Tech Corp and has 5 years of experience in Python development. His email is john.smith@email.com and he graduated from MIT in 2018."],
	["Order #12345 was placed on March 15, 2024. Customer: Sarah Johnson, Address: 123 Main St, Boston MA 02101. Items: 2x Laptop ($999 each), 1x Mouse ($25). Total: $2023. Status: Shipped via FedEx, tracking: 1234567890."],
	["The conference will be held on June 10-12, 2024 at the Grand Hotel in San Francisco. Registration fee is $500 for early bird (before May 1) and $650 for regular registration. Contact info@conference.com for questions."],
	["Product: Wireless Headphones Model XYZ-100. Price: $199.99. Features: Bluetooth 5.0, 30-hour battery, noise cancellation, wireless charging case. Colors available: Black, White, Blue. Warranty: 2 years. Rating: 4.5/5 stars (324 reviews)."]
	],
	inputs=input_text,
	label="Click on any example to try it"
	)

	# Event handlers
	convert_btn.click(
	fn=text_to_json,
	inputs=[input_text, max_tokens, temperature, top_p, top_k],
	outputs=output_json,
	show_progress=True
	)

	# Allow Enter key to trigger conversion
	input_text.submit(
	fn=text_to_json,
	inputs=[input_text, max_tokens, temperature, top_p, top_k],
	outputs=output_json,
	show_progress=True
	)

	return demo

	# Initialize the demo
	if __name__ == "__main__":
	print("🌊 Initializing Osmosis Structure Demo...")

	# Load model at startup
	if load_model():
	print("🚀 Creating Gradio interface...")
	demo = create_demo()
	demo.launch(
	share=True,
	show_error=True,
	show_tips=True,
	enable_queue=True,
	ssr_mode=False,
	mcp_server=True
	)
	else:
	print("❌ Failed to load model. Please check your setup.")