Spaces:

Dev011a
/

json-structured

Runtime error

App Files Files Community

Dev8709 commited on Jun 7

Commit

44b5c36

1 Parent(s): 6dfa42c

Updated

Browse files

Files changed (7) hide show

.gitattributes +2 -0
README.md +17 -12
app.py +69 -39
config.py +68 -0
download_model.py +106 -0
install_verify.py +123 -0
setup_and_run.py +108 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+*.gguf filter=lfs diff=lfs merge=lfs -text
+*.ggml filter=lfs diff=lfs merge=lfs -text

README.md CHANGED Viewed

@@ -12,15 +12,16 @@ short_description: Plain text to json using llama.cpp
 # Plain Text to JSON with llama.cpp
-This Hugging Face Space converts plain text into structured JSON format using llama.cpp for efficient CPU inference.
 ## Features
-- **llama.cpp Integration**: Uses llama-cpp-python for efficient model inference
 - **Gradio Interface**: User-friendly web interface
-- **JSON Conversion**: Converts unstructured text to structured JSON
-- **Model Management**: Load and manage GGUF models
-- **Demo Mode**: Basic functionality without requiring a model
 ## Setup
@@ -28,18 +29,22 @@ The space automatically installs:
 - `llama-cpp-python` for llama.cpp integration
 - Required build tools (`build-essential`, `cmake`)
 - Gradio and other dependencies
 ## Usage
-1. **Demo Mode**: Use "Demo (No Model)" for basic text-to-JSON conversion
-2. **Full Mode**: Load a GGUF model for AI-powered conversion
-3. **Customize**: Adjust temperature and max_tokens for different outputs
-## Model Requirements
-- Models must be in GGUF format
-- Recommended: Small to medium-sized models for better performance
-- Popular options: Llama 2, CodeLlama, or other instruction-tuned models
 ## Configuration

 # Plain Text to JSON with llama.cpp
+This Hugging Face Space converts plain text into structured JSON format using llama.cpp for efficient CPU inference, powered by the Osmosis Structure 0.6B model.
 ## Features
+- **llama.cpp Integration**: Uses llama-cpp-python for efficient CPU model inference
+- **Osmosis Structure Model**: Specialized 0.6B parameter model for structured data extraction
 - **Gradio Interface**: User-friendly web interface
+- **JSON Conversion**: Converts unstructured text to well-formatted JSON
+- **Auto-Download**: Automatically downloads the Osmosis model on first use
+- **Demo Mode**: Basic functionality without requiring the AI model
 ## Setup
 - `llama-cpp-python` for llama.cpp integration
 - Required build tools (`build-essential`, `cmake`)
 - Gradio and other dependencies
+- Downloads Osmosis Structure 0.6B model (~1.2GB) on first use
 ## Usage
+1. **Quick Start**: Run `python setup_and_run.py` for automated setup
+2. **Demo Mode**: Use "Demo (No Model)" for basic text-to-JSON conversion
+3. **Full Mode**: Click "Load Model" to download and use the Osmosis model
+4. **Customize**: Adjust temperature and max_tokens for different output styles
+## Model Details
+- **Model**: Osmosis Structure 0.6B BF16 GGUF
+- **Repository**: https://huggingface.co/osmosis-ai/Osmosis-Structure-0.6B
+- **Specialization**: Structure extraction and JSON generation
+- **Size**: ~1.2GB download
+- **Format**: GGUF (optimized for llama.cpp)
 ## Configuration

app.py CHANGED Viewed

@@ -3,6 +3,7 @@ import json
 from llama_cpp import Llama
 import os
 from huggingface_hub import hf_hub_download
 # Global variable to store the model
 llm = None
@@ -11,66 +12,94 @@ def load_model():
     """Load the llama.cpp model"""
     global llm
     try:
-        # You can replace this with any GGUF model from Hugging Face
-        # For example, using a small model for demonstration
-        model_name = "microsoft/DialoGPT-medium"
-        # For now, we'll use a local model path or download one
-        # This is a placeholder - you'll need to specify the actual model
-        print("Loading llama.cpp model...")
-        # Initialize with basic settings
-        # Note: You'll need to provide an actual GGUF model file
-        # llm = Llama(
-        #     model_path="path/to/your/model.gguf",
-        #     n_ctx=2048,
-        #     n_threads=2,
-        #     verbose=False
-        # )
-        print("Model loaded successfully!")
-        return "Model loaded successfully!"
     except Exception as e:
-        print(f"Error loading model: {e}")
-        return f"Error loading model: {e}"
 def text_to_json(input_text, max_tokens=512, temperature=0.7):
     """Convert plain text to structured JSON using llama.cpp"""
     global llm
     if llm is None:
-        return {"error": "Model not loaded. Please load the model first."}
     try:
-        # Create a prompt for JSON generation
-        prompt = f"""Convert the following text into a structured JSON format. Extract key information and organize it logically:
-Text: {input_text}
-JSON:"""
         # Generate response using llama.cpp
         response = llm(
             prompt,
-            max_tokens=max_tokens,
-            temperature=temperature,
-            stop=["```", "\n\n\n"],
             echo=False
         )
         generated_text = response['choices'][0]['text'].strip()
         # Try to parse as JSON to validate
         try:
             parsed_json = json.loads(generated_text)
             return json.dumps(parsed_json, indent=2)
         except json.JSONDecodeError:
-            # If not valid JSON, return as a structured attempt
-            return generated_text
     except Exception as e:
-        return f"Error generating JSON: {str(e)}"
 def demo_without_model(input_text):
     """Demo function that works without loading a model"""
@@ -99,7 +128,7 @@ def demo_without_model(input_text):
 # Create Gradio interface
 with gr.Blocks(title="Plain Text to JSON with llama.cpp") as demo:
     gr.Markdown("# Plain Text to JSON Converter")
-    gr.Markdown("Convert plain text into structured JSON format using llama.cpp")
     with gr.Tab("Text to JSON"):
         with gr.Row():
@@ -144,14 +173,15 @@ with gr.Blocks(title="Plain Text to JSON with llama.cpp") as demo:
         gr.Markdown("""
         ### Instructions:
-        1. Click "Load Model" to initialize llama.cpp (requires a GGUF model file)
-        2. Use "Demo (No Model)" for basic functionality without loading a model
-        3. For full functionality, you need to provide a GGUF model file
         ### Notes:
-        - This space uses llama.cpp for efficient CPU inference
-        - Models should be in GGUF format
-        - Adjust max_tokens and temperature for different outputs
         """)
     # Event handlers

 from llama_cpp import Llama
 import os
 from huggingface_hub import hf_hub_download
+from config import get_model_config, get_generation_config, get_recommended_model
 # Global variable to store the model
 llm = None
     """Load the llama.cpp model"""
     global llm
     try:
+        print("Loading Osmosis Structure model...")
+        # Get model info and config
+        model_info = get_recommended_model()
+        model_config = get_model_config()
+        # Create models directory
+        os.makedirs("./models", exist_ok=True)
+        # Download the Osmosis model
+        print(f"Downloading {model_info['name']} ({model_info['size']})...")
+        model_path = hf_hub_download(
+            repo_id=model_info['repo_id'],
+            filename=model_info['filename'],
+            cache_dir="./models",
+            resume_download=True
+        )
+        print(f"Model downloaded to: {model_path}")
+        print("Initializing llama.cpp...")
+        # Initialize llama.cpp with the downloaded model
+        llm = Llama(
+            model_path=model_path,
+            **model_config
+        )
+        print("✅ Osmosis Structure model loaded successfully!")
+        return f"✅ Model loaded: {model_info['name']}\nPath: {model_path}\nDescription: {model_info['description']}"
     except Exception as e:
+        error_msg = f"❌ Error loading model: {e}"
+        print(error_msg)
+        return error_msg
 def text_to_json(input_text, max_tokens=512, temperature=0.7):
     """Convert plain text to structured JSON using llama.cpp"""
     global llm
     if llm is None:
+        return "❌ Model not loaded. Please load the model first."
     try:
+        # Create a structured prompt optimized for the Osmosis model
+        prompt = f"""<|system|>
+You are a helpful assistant that converts unstructured text into well-formatted JSON. Extract key information and organize it into a logical structure.
+<|user|>
+Convert this text to JSON format:
+{input_text}
+<|assistant|>
+```json"""
+        # Get generation config and override with user settings
+        gen_config = get_generation_config()
+        gen_config.update({
+            "max_tokens": max_tokens,
+            "temperature": temperature
+        })
         # Generate response using llama.cpp
         response = llm(
             prompt,
+            **gen_config,
             echo=False
         )
         generated_text = response['choices'][0]['text'].strip()
+        # Clean up the response - remove markdown formatting if present
+        if generated_text.startswith('```json'):
+            generated_text = generated_text[7:]
+        if generated_text.endswith('```'):
+            generated_text = generated_text[:-3]
+        generated_text = generated_text.strip()
         # Try to parse as JSON to validate
         try:
             parsed_json = json.loads(generated_text)
             return json.dumps(parsed_json, indent=2)
         except json.JSONDecodeError:
+            # If not valid JSON, try to clean it up or return as is
+            return f"Generated (may need cleanup):\n{generated_text}"
     except Exception as e:
+        return f"❌ Error generating JSON: {str(e)}"
 def demo_without_model(input_text):
     """Demo function that works without loading a model"""
 # Create Gradio interface
 with gr.Blocks(title="Plain Text to JSON with llama.cpp") as demo:
     gr.Markdown("# Plain Text to JSON Converter")
+    gr.Markdown("Convert plain text into structured JSON format using llama.cpp and Osmosis Structure model")
     with gr.Tab("Text to JSON"):
         with gr.Row():
         gr.Markdown("""
         ### Instructions:
+        1. Click "Load Model" to download and initialize the Osmosis Structure model
+        2. Use "Demo (No Model)" for basic functionality without loading the AI model
+        3. The Osmosis model is optimized for structured data extraction and JSON generation
         ### Notes:
+        - Uses llama.cpp for efficient CPU inference
+        - Osmosis Structure 0.6B model (~1.2GB) will be downloaded automatically
+        - Model is specialized for converting unstructured text to structured formats
+        - Adjust max_tokens and temperature for different output styles
         """)
     # Event handlers

config.py CHANGED Viewed

	@@ -0,0 +1,68 @@

+"""
+Configuration settings for llama.cpp in Hugging Face Space
+"""
+import os
+# Model configuration
+MODEL_CONFIG = {
+    "n_ctx": 2048,           # Context window size
+    "n_threads": 2,          # Number of threads (conservative for HF Spaces)
+    "n_batch": 8,            # Batch size for prompt processing
+    "use_mmap": True,        # Use memory mapping for model files
+    "use_mlock": False,      # Don't lock model in memory (saves RAM)
+    "verbose": False,        # Reduce logging in production
+}
+# Generation defaults
+GENERATION_CONFIG = {
+    "temperature": 0.7,
+    "top_p": 0.9,
+    "top_k": 40,
+    "repeat_penalty": 1.1,
+    "stop": ["```", "\n\n\n", "Human:", "Assistant:"],
+}
+# Hugging Face Space specific settings
+HF_SPACE_CONFIG = {
+    "max_memory_usage": "2GB",    # Conservative memory usage
+    "timeout_seconds": 30,        # Request timeout
+    "enable_cpu_only": True,      # Force CPU inference
+}
+# Model download settings
+MODEL_DOWNLOAD_CONFIG = {
+    "cache_dir": "./models",
+    "use_auth_token": os.getenv("HF_TOKEN", None),
+    "resume_download": True,
+}
+# Recommended small GGUF models for demonstration
+RECOMMENDED_MODELS = [
+    {
+        "name": "Osmosis-Structure-0.6B",
+        "repo_id": "osmosis-ai/Osmosis-Structure-0.6B",
+        "filename": "Osmosis-Structure-0.6B-BF16.gguf",
+        "size": "~1.2GB",
+        "description": "Osmosis AI structure-focused model for JSON generation"
+    },
+    {
+        "name": "TinyLlama-1.1B-Chat-v1.0-GGUF",
+        "repo_id": "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF",
+        "filename": "tinyllama-1.1b-chat-v1.0.q4_k_m.gguf",
+        "size": "~700MB",
+        "description": "Small, fast model good for testing"
+    }
+]
+def get_model_config():
+    """Get model configuration optimized for HF Spaces"""
+    return MODEL_CONFIG.copy()
+def get_generation_config():
+    """Get generation configuration"""
+    return GENERATION_CONFIG.copy()
+def get_recommended_model():
+    """Get the recommended model for this space"""
+    return RECOMMENDED_MODELS[0]  # Return TinyLlama as default

download_model.py ADDED Viewed

	@@ -0,0 +1,106 @@

+#!/usr/bin/env python3
+"""
+Download a sample GGUF model for testing llama.cpp integration
+"""
+import os
+from huggingface_hub import hf_hub_download
+from config import get_recommended_model, MODEL_DOWNLOAD_CONFIG
+def download_sample_model():
+    """Download a recommended small model for testing"""
+    model_info = get_recommended_model()
+    print(f"📥 Downloading {model_info['name']}...")
+    print(f"   Repository: {model_info['repo_id']}")
+    print(f"   File: {model_info['filename']}")
+    print(f"   Size: {model_info['size']}")
+    print(f"   Description: {model_info['description']}")
+    try:
+        # Create models directory if it doesn't exist
+        os.makedirs(MODEL_DOWNLOAD_CONFIG['cache_dir'], exist_ok=True)
+        # Download the model
+        model_path = hf_hub_download(
+            repo_id=model_info['repo_id'],
+            filename=model_info['filename'],
+            cache_dir=MODEL_DOWNLOAD_CONFIG['cache_dir'],
+            resume_download=MODEL_DOWNLOAD_CONFIG['resume_download'],
+            token=MODEL_DOWNLOAD_CONFIG['use_auth_token']
+        )
+        print(f"✅ Model downloaded successfully!")
+        print(f"   Path: {model_path}")
+        # Create a symlink in the models directory for easy access
+        symlink_path = os.path.join(MODEL_DOWNLOAD_CONFIG['cache_dir'], "model.gguf")
+        if os.path.exists(symlink_path):
+            os.remove(symlink_path)
+        try:
+            os.symlink(model_path, symlink_path)
+            print(f"   Symlink created: {symlink_path}")
+        except OSError:
+            # Symlinks might not work on all systems, just copy the path
+            print(f"   Use this path in your code: {model_path}")
+        return model_path
+    except Exception as e:
+        print(f"❌ Error downloading model: {e}")
+        print("💡 You can manually download a GGUF model and place it in ./models/")
+        return None
+def list_available_models():
+    """List models available in the models directory"""
+    models_dir = MODEL_DOWNLOAD_CONFIG['cache_dir']
+    if not os.path.exists(models_dir):
+        print(f"📁 Models directory doesn't exist: {models_dir}")
+        return []
+    model_files = []
+    for file in os.listdir(models_dir):
+        if file.endswith('.gguf') or file.endswith('.ggml'):
+            file_path = os.path.join(models_dir, file)
+            file_size = os.path.getsize(file_path)
+            model_files.append({
+                'name': file,
+                'path': file_path,
+                'size_mb': file_size / (1024 * 1024)
+            })
+    if model_files:
+        print("📋 Available models:")
+        for model in model_files:
+            print(f"   - {model['name']} ({model['size_mb']:.1f} MB)")
+    else:
+        print("📭 No GGUF/GGML models found in models directory")
+    return model_files
+if __name__ == "__main__":
+    print("🤖 Model Download Utility for llama.cpp")
+    print("=" * 50)
+    # List existing models
+    print("\n🔍 Checking for existing models...")
+    existing_models = list_available_models()
+    if not existing_models:
+        print("\n📥 No models found. Downloading sample model...")
+        download_sample_model()
+    else:
+        print(f"\n✅ Found {len(existing_models)} existing model(s)")
+        # Ask if user wants to download another model
+        print("\n❓ Download sample model anyway? (y/n): ", end="")
+        try:
+            response = input().lower().strip()
+            if response in ['y', 'yes']:
+                download_sample_model()
+            else:
+                print("👍 Using existing models")
+        except (EOFError, KeyboardInterrupt):
+            print("\n👍 Using existing models")

install_verify.py ADDED Viewed

	@@ -0,0 +1,123 @@

+#!/usr/bin/env python3
+"""
+Installation verification script for llama.cpp in Hugging Face Space
+Run this to verify that llama.cpp is properly installed and configured
+"""
+import subprocess
+import sys
+import os
+def run_command(command, description):
+    """Run a command and return success status"""
+    print(f"🔍 {description}...")
+    try:
+        result = subprocess.run(command, shell=True, capture_output=True, text=True, timeout=30)
+        if result.returncode == 0:
+            print(f"✅ {description} - SUCCESS")
+            if result.stdout.strip():
+                print(f"   Output: {result.stdout.strip()}")
+            return True
+        else:
+            print(f"❌ {description} - FAILED")
+            if result.stderr.strip():
+                print(f"   Error: {result.stderr.strip()}")
+            return False
+    except subprocess.TimeoutExpired:
+        print(f"⏰ {description} - TIMEOUT")
+        return False
+    except Exception as e:
+        print(f"❌ {description} - ERROR: {e}")
+        return False
+def check_python_version():
+    """Check Python version compatibility"""
+    version = sys.version_info
+    print(f"🐍 Python version: {version.major}.{version.minor}.{version.micro}")
+    if version.major >= 3 and version.minor >= 8:
+        print("✅ Python version is compatible")
+        return True
+    else:
+        print("❌ Python version should be 3.8 or higher")
+        return False
+def check_system_packages():
+    """Check if required system packages are available"""
+    packages = ["gcc", "g++", "cmake", "make"]
+    results = []
+    for package in packages:
+        success = run_command(f"which {package}", f"Checking {package}")
+        results.append(success)
+    return all(results)
+def install_and_test_llamacpp():
+    """Install and test llama-cpp-python"""
+    print("\n📦 Installing llama-cpp-python...")
+    # Install llama-cpp-python
+    install_success = run_command(
+        f"{sys.executable} -m pip install llama-cpp-python --verbose",
+        "Installing llama-cpp-python"
+    )
+    if not install_success:
+        print("❌ Failed to install llama-cpp-python")
+        return False
+    # Test import
+    test_success = run_command(
+        f"{sys.executable} -c 'from llama_cpp import Llama; print(\"Import successful\")'",
+        "Testing llama-cpp-python import"
+    )
+    return test_success
+def main():
+    """Main verification function"""
+    print("🚀 llama.cpp Installation Verification for Hugging Face Space")
+    print("=" * 70)
+    checks = [
+        ("Python Version", check_python_version),
+        ("System Packages", check_system_packages),
+        ("llama-cpp-python Installation", install_and_test_llamacpp),
+    ]
+    results = []
+    for check_name, check_func in checks:
+        print(f"\n🧪 Running: {check_name}")
+        print("-" * 40)
+        result = check_func()
+        results.append(result)
+        print()
+    print("=" * 70)
+    print("📊 VERIFICATION SUMMARY:")
+    for i, (check_name, _) in enumerate(checks):
+        status = "✅ PASSED" if results[i] else "❌ FAILED"
+        print(f"   {check_name}: {status}")
+    if all(results):
+        print("\n🎉 ALL CHECKS PASSED!")
+        print("✅ llama.cpp is successfully installed and ready to use.")
+        print("\n📝 Next steps:")
+        print("   1. Run 'python test_llamacpp.py' to test the integration")
+        print("   2. Start your Gradio app with 'python app.py'")
+        print("   3. Upload a GGUF model file to enable full functionality")
+    else:
+        print("\n⚠️  SOME CHECKS FAILED!")
+        print("❌ Please review the errors above and fix them before proceeding.")
+        print("\n🔧 Common solutions:")
+        print("   - Ensure build tools are installed (build-essential, cmake)")
+        print("   - Check that you have sufficient memory and disk space")
+        print("   - Try reinstalling with: pip install --force-reinstall llama-cpp-python")
+    return all(results)
+if __name__ == "__main__":
+    success = main()
+    sys.exit(0 if success else 1)

setup_and_run.py ADDED Viewed

	@@ -0,0 +1,108 @@

+#!/usr/bin/env python3
+"""
+Setup and run script for the llama.cpp Hugging Face Space
+"""
+import subprocess
+import sys
+import os
+def install_dependencies():
+    """Install required dependencies"""
+    print("📦 Installing dependencies...")
+    try:
+        # Upgrade pip first
+        subprocess.run([sys.executable, "-m", "pip", "install", "--upgrade", "pip"], check=True)
+        # Install requirements
+        subprocess.run([sys.executable, "-m", "pip", "install", "-r", "requirements.txt"], check=True)
+        print("✅ Dependencies installed successfully!")
+        return True
+    except subprocess.CalledProcessError as e:
+        print(f"❌ Error installing dependencies: {e}")
+        return False
+def test_installation():
+    """Test if llama.cpp is properly installed"""
+    print("🧪 Testing llama.cpp installation...")
+    try:
+        # Test import
+        subprocess.run([sys.executable, "-c", "from llama_cpp import Llama; print('✅ llama-cpp-python imported successfully')"], check=True)
+        # Test other dependencies
+        test_imports = [
+            "import gradio; print('✅ Gradio imported')",
+            "import huggingface_hub; print('✅ Hugging Face Hub imported')",
+            "from config import get_recommended_model; print('✅ Config imported')"
+        ]
+        for test_import in test_imports:
+            subprocess.run([sys.executable, "-c", test_import], check=True)
+        print("✅ All tests passed!")
+        return True
+    except subprocess.CalledProcessError as e:
+        print(f"❌ Installation test failed: {e}")
+        return False
+def run_app():
+    """Run the Gradio app"""
+    print("🚀 Starting the Gradio app...")
+    print("📝 Note: The Osmosis model will be downloaded on first use")
+    print("🌐 The app will be available at http://localhost:7860")
+    print("⏹️  Press Ctrl+C to stop the app")
+    try:
+        subprocess.run([sys.executable, "app.py"], check=True)
+    except KeyboardInterrupt:
+        print("\n👋 App stopped by user")
+    except subprocess.CalledProcessError as e:
+        print(f"❌ Error running app: {e}")
+def main():
+    """Main setup function"""
+    print("🔧 llama.cpp Hugging Face Space Setup")
+    print("=" * 50)
+    # Check Python version
+    if sys.version_info < (3, 8):
+        print("❌ Python 3.8 or higher is required")
+        sys.exit(1)
+    print(f"✅ Python version: {sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}")
+    # Install dependencies
+    if not install_dependencies():
+        print("❌ Failed to install dependencies")
+        sys.exit(1)
+    # Test installation
+    if not test_installation():
+        print("❌ Installation test failed")
+        sys.exit(1)
+    print("\n🎉 Setup completed successfully!")
+    print("\n📋 What's installed:")
+    print("   - llama-cpp-python for efficient CPU inference")
+    print("   - Gradio for the web interface")
+    print("   - Hugging Face Hub for model downloading")
+    print("   - Osmosis Structure 0.6B model (will download on first use)")
+    # Ask if user wants to run the app
+    print("\n❓ Would you like to run the app now? (y/n): ", end="")
+    try:
+        response = input().lower().strip()
+        if response in ['y', 'yes']:
+            run_app()
+        else:
+            print("👍 Setup complete! Run 'python app.py' when ready.")
+    except (EOFError, KeyboardInterrupt):
+        print("\n👍 Setup complete! Run 'python app.py' when ready.")
+if __name__ == "__main__":
+    main()