Spaces:
Runtime error
Runtime error
Updated
Browse files- .gitattributes +2 -0
- README.md +17 -12
- app.py +69 -39
- config.py +68 -0
- download_model.py +106 -0
- install_verify.py +123 -0
- setup_and_run.py +108 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
*.gguf filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
*.ggml filter=lfs diff=lfs merge=lfs -text
|
README.md
CHANGED
|
@@ -12,15 +12,16 @@ short_description: Plain text to json using llama.cpp
|
|
| 12 |
|
| 13 |
# Plain Text to JSON with llama.cpp
|
| 14 |
|
| 15 |
-
This Hugging Face Space converts plain text into structured JSON format using llama.cpp for efficient CPU inference.
|
| 16 |
|
| 17 |
## Features
|
| 18 |
|
| 19 |
-
- **llama.cpp Integration**: Uses llama-cpp-python for efficient model inference
|
|
|
|
| 20 |
- **Gradio Interface**: User-friendly web interface
|
| 21 |
-
- **JSON Conversion**: Converts unstructured text to
|
| 22 |
-
- **
|
| 23 |
-
- **Demo Mode**: Basic functionality without requiring
|
| 24 |
|
| 25 |
## Setup
|
| 26 |
|
|
@@ -28,18 +29,22 @@ The space automatically installs:
|
|
| 28 |
- `llama-cpp-python` for llama.cpp integration
|
| 29 |
- Required build tools (`build-essential`, `cmake`)
|
| 30 |
- Gradio and other dependencies
|
|
|
|
| 31 |
|
| 32 |
## Usage
|
| 33 |
|
| 34 |
-
1. **
|
| 35 |
-
2. **
|
| 36 |
-
3. **
|
|
|
|
| 37 |
|
| 38 |
-
## Model
|
| 39 |
|
| 40 |
-
-
|
| 41 |
-
-
|
| 42 |
-
-
|
|
|
|
|
|
|
| 43 |
|
| 44 |
## Configuration
|
| 45 |
|
|
|
|
| 12 |
|
| 13 |
# Plain Text to JSON with llama.cpp
|
| 14 |
|
| 15 |
+
This Hugging Face Space converts plain text into structured JSON format using llama.cpp for efficient CPU inference, powered by the Osmosis Structure 0.6B model.
|
| 16 |
|
| 17 |
## Features
|
| 18 |
|
| 19 |
+
- **llama.cpp Integration**: Uses llama-cpp-python for efficient CPU model inference
|
| 20 |
+
- **Osmosis Structure Model**: Specialized 0.6B parameter model for structured data extraction
|
| 21 |
- **Gradio Interface**: User-friendly web interface
|
| 22 |
+
- **JSON Conversion**: Converts unstructured text to well-formatted JSON
|
| 23 |
+
- **Auto-Download**: Automatically downloads the Osmosis model on first use
|
| 24 |
+
- **Demo Mode**: Basic functionality without requiring the AI model
|
| 25 |
|
| 26 |
## Setup
|
| 27 |
|
|
|
|
| 29 |
- `llama-cpp-python` for llama.cpp integration
|
| 30 |
- Required build tools (`build-essential`, `cmake`)
|
| 31 |
- Gradio and other dependencies
|
| 32 |
+
- Downloads Osmosis Structure 0.6B model (~1.2GB) on first use
|
| 33 |
|
| 34 |
## Usage
|
| 35 |
|
| 36 |
+
1. **Quick Start**: Run `python setup_and_run.py` for automated setup
|
| 37 |
+
2. **Demo Mode**: Use "Demo (No Model)" for basic text-to-JSON conversion
|
| 38 |
+
3. **Full Mode**: Click "Load Model" to download and use the Osmosis model
|
| 39 |
+
4. **Customize**: Adjust temperature and max_tokens for different output styles
|
| 40 |
|
| 41 |
+
## Model Details
|
| 42 |
|
| 43 |
+
- **Model**: Osmosis Structure 0.6B BF16 GGUF
|
| 44 |
+
- **Repository**: https://huggingface.co/osmosis-ai/Osmosis-Structure-0.6B
|
| 45 |
+
- **Specialization**: Structure extraction and JSON generation
|
| 46 |
+
- **Size**: ~1.2GB download
|
| 47 |
+
- **Format**: GGUF (optimized for llama.cpp)
|
| 48 |
|
| 49 |
## Configuration
|
| 50 |
|
app.py
CHANGED
|
@@ -3,6 +3,7 @@ import json
|
|
| 3 |
from llama_cpp import Llama
|
| 4 |
import os
|
| 5 |
from huggingface_hub import hf_hub_download
|
|
|
|
| 6 |
|
| 7 |
# Global variable to store the model
|
| 8 |
llm = None
|
|
@@ -11,66 +12,94 @@ def load_model():
|
|
| 11 |
"""Load the llama.cpp model"""
|
| 12 |
global llm
|
| 13 |
try:
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
#
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
|
| 34 |
except Exception as e:
|
| 35 |
-
|
| 36 |
-
|
|
|
|
| 37 |
|
| 38 |
def text_to_json(input_text, max_tokens=512, temperature=0.7):
|
| 39 |
"""Convert plain text to structured JSON using llama.cpp"""
|
| 40 |
global llm
|
| 41 |
|
| 42 |
if llm is None:
|
| 43 |
-
return
|
| 44 |
|
| 45 |
try:
|
| 46 |
-
# Create a prompt for
|
| 47 |
-
prompt = f"""
|
|
|
|
|
|
|
|
|
|
|
|
|
| 48 |
|
| 49 |
-
|
| 50 |
|
| 51 |
-
|
|
|
|
| 52 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 53 |
# Generate response using llama.cpp
|
| 54 |
response = llm(
|
| 55 |
prompt,
|
| 56 |
-
|
| 57 |
-
temperature=temperature,
|
| 58 |
-
stop=["```", "\n\n\n"],
|
| 59 |
echo=False
|
| 60 |
)
|
| 61 |
|
| 62 |
generated_text = response['choices'][0]['text'].strip()
|
| 63 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 64 |
# Try to parse as JSON to validate
|
| 65 |
try:
|
| 66 |
parsed_json = json.loads(generated_text)
|
| 67 |
return json.dumps(parsed_json, indent=2)
|
| 68 |
except json.JSONDecodeError:
|
| 69 |
-
# If not valid JSON, return as
|
| 70 |
-
return generated_text
|
| 71 |
|
| 72 |
except Exception as e:
|
| 73 |
-
return f"Error generating JSON: {str(e)}"
|
| 74 |
|
| 75 |
def demo_without_model(input_text):
|
| 76 |
"""Demo function that works without loading a model"""
|
|
@@ -99,7 +128,7 @@ def demo_without_model(input_text):
|
|
| 99 |
# Create Gradio interface
|
| 100 |
with gr.Blocks(title="Plain Text to JSON with llama.cpp") as demo:
|
| 101 |
gr.Markdown("# Plain Text to JSON Converter")
|
| 102 |
-
gr.Markdown("Convert plain text into structured JSON format using llama.cpp")
|
| 103 |
|
| 104 |
with gr.Tab("Text to JSON"):
|
| 105 |
with gr.Row():
|
|
@@ -144,14 +173,15 @@ with gr.Blocks(title="Plain Text to JSON with llama.cpp") as demo:
|
|
| 144 |
|
| 145 |
gr.Markdown("""
|
| 146 |
### Instructions:
|
| 147 |
-
1. Click "Load Model" to initialize
|
| 148 |
-
2. Use "Demo (No Model)" for basic functionality without loading
|
| 149 |
-
3.
|
| 150 |
|
| 151 |
### Notes:
|
| 152 |
-
-
|
| 153 |
-
-
|
| 154 |
-
-
|
|
|
|
| 155 |
""")
|
| 156 |
|
| 157 |
# Event handlers
|
|
|
|
| 3 |
from llama_cpp import Llama
|
| 4 |
import os
|
| 5 |
from huggingface_hub import hf_hub_download
|
| 6 |
+
from config import get_model_config, get_generation_config, get_recommended_model
|
| 7 |
|
| 8 |
# Global variable to store the model
|
| 9 |
llm = None
|
|
|
|
| 12 |
"""Load the llama.cpp model"""
|
| 13 |
global llm
|
| 14 |
try:
|
| 15 |
+
print("Loading Osmosis Structure model...")
|
| 16 |
+
|
| 17 |
+
# Get model info and config
|
| 18 |
+
model_info = get_recommended_model()
|
| 19 |
+
model_config = get_model_config()
|
| 20 |
+
|
| 21 |
+
# Create models directory
|
| 22 |
+
os.makedirs("./models", exist_ok=True)
|
| 23 |
+
|
| 24 |
+
# Download the Osmosis model
|
| 25 |
+
print(f"Downloading {model_info['name']} ({model_info['size']})...")
|
| 26 |
+
model_path = hf_hub_download(
|
| 27 |
+
repo_id=model_info['repo_id'],
|
| 28 |
+
filename=model_info['filename'],
|
| 29 |
+
cache_dir="./models",
|
| 30 |
+
resume_download=True
|
| 31 |
+
)
|
| 32 |
+
|
| 33 |
+
print(f"Model downloaded to: {model_path}")
|
| 34 |
+
print("Initializing llama.cpp...")
|
| 35 |
+
|
| 36 |
+
# Initialize llama.cpp with the downloaded model
|
| 37 |
+
llm = Llama(
|
| 38 |
+
model_path=model_path,
|
| 39 |
+
**model_config
|
| 40 |
+
)
|
| 41 |
+
|
| 42 |
+
print("β
Osmosis Structure model loaded successfully!")
|
| 43 |
+
return f"β
Model loaded: {model_info['name']}\nPath: {model_path}\nDescription: {model_info['description']}"
|
| 44 |
|
| 45 |
except Exception as e:
|
| 46 |
+
error_msg = f"β Error loading model: {e}"
|
| 47 |
+
print(error_msg)
|
| 48 |
+
return error_msg
|
| 49 |
|
| 50 |
def text_to_json(input_text, max_tokens=512, temperature=0.7):
|
| 51 |
"""Convert plain text to structured JSON using llama.cpp"""
|
| 52 |
global llm
|
| 53 |
|
| 54 |
if llm is None:
|
| 55 |
+
return "β Model not loaded. Please load the model first."
|
| 56 |
|
| 57 |
try:
|
| 58 |
+
# Create a structured prompt optimized for the Osmosis model
|
| 59 |
+
prompt = f"""<|system|>
|
| 60 |
+
You are a helpful assistant that converts unstructured text into well-formatted JSON. Extract key information and organize it into a logical structure.
|
| 61 |
+
|
| 62 |
+
<|user|>
|
| 63 |
+
Convert this text to JSON format:
|
| 64 |
|
| 65 |
+
{input_text}
|
| 66 |
|
| 67 |
+
<|assistant|>
|
| 68 |
+
```json"""
|
| 69 |
|
| 70 |
+
# Get generation config and override with user settings
|
| 71 |
+
gen_config = get_generation_config()
|
| 72 |
+
gen_config.update({
|
| 73 |
+
"max_tokens": max_tokens,
|
| 74 |
+
"temperature": temperature
|
| 75 |
+
})
|
| 76 |
+
|
| 77 |
# Generate response using llama.cpp
|
| 78 |
response = llm(
|
| 79 |
prompt,
|
| 80 |
+
**gen_config,
|
|
|
|
|
|
|
| 81 |
echo=False
|
| 82 |
)
|
| 83 |
|
| 84 |
generated_text = response['choices'][0]['text'].strip()
|
| 85 |
|
| 86 |
+
# Clean up the response - remove markdown formatting if present
|
| 87 |
+
if generated_text.startswith('```json'):
|
| 88 |
+
generated_text = generated_text[7:]
|
| 89 |
+
if generated_text.endswith('```'):
|
| 90 |
+
generated_text = generated_text[:-3]
|
| 91 |
+
generated_text = generated_text.strip()
|
| 92 |
+
|
| 93 |
# Try to parse as JSON to validate
|
| 94 |
try:
|
| 95 |
parsed_json = json.loads(generated_text)
|
| 96 |
return json.dumps(parsed_json, indent=2)
|
| 97 |
except json.JSONDecodeError:
|
| 98 |
+
# If not valid JSON, try to clean it up or return as is
|
| 99 |
+
return f"Generated (may need cleanup):\n{generated_text}"
|
| 100 |
|
| 101 |
except Exception as e:
|
| 102 |
+
return f"β Error generating JSON: {str(e)}"
|
| 103 |
|
| 104 |
def demo_without_model(input_text):
|
| 105 |
"""Demo function that works without loading a model"""
|
|
|
|
| 128 |
# Create Gradio interface
|
| 129 |
with gr.Blocks(title="Plain Text to JSON with llama.cpp") as demo:
|
| 130 |
gr.Markdown("# Plain Text to JSON Converter")
|
| 131 |
+
gr.Markdown("Convert plain text into structured JSON format using llama.cpp and Osmosis Structure model")
|
| 132 |
|
| 133 |
with gr.Tab("Text to JSON"):
|
| 134 |
with gr.Row():
|
|
|
|
| 173 |
|
| 174 |
gr.Markdown("""
|
| 175 |
### Instructions:
|
| 176 |
+
1. Click "Load Model" to download and initialize the Osmosis Structure model
|
| 177 |
+
2. Use "Demo (No Model)" for basic functionality without loading the AI model
|
| 178 |
+
3. The Osmosis model is optimized for structured data extraction and JSON generation
|
| 179 |
|
| 180 |
### Notes:
|
| 181 |
+
- Uses llama.cpp for efficient CPU inference
|
| 182 |
+
- Osmosis Structure 0.6B model (~1.2GB) will be downloaded automatically
|
| 183 |
+
- Model is specialized for converting unstructured text to structured formats
|
| 184 |
+
- Adjust max_tokens and temperature for different output styles
|
| 185 |
""")
|
| 186 |
|
| 187 |
# Event handlers
|
config.py
CHANGED
|
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Configuration settings for llama.cpp in Hugging Face Space
|
| 3 |
+
"""
|
| 4 |
+
|
| 5 |
+
import os
|
| 6 |
+
|
| 7 |
+
# Model configuration
|
| 8 |
+
MODEL_CONFIG = {
|
| 9 |
+
"n_ctx": 2048, # Context window size
|
| 10 |
+
"n_threads": 2, # Number of threads (conservative for HF Spaces)
|
| 11 |
+
"n_batch": 8, # Batch size for prompt processing
|
| 12 |
+
"use_mmap": True, # Use memory mapping for model files
|
| 13 |
+
"use_mlock": False, # Don't lock model in memory (saves RAM)
|
| 14 |
+
"verbose": False, # Reduce logging in production
|
| 15 |
+
}
|
| 16 |
+
|
| 17 |
+
# Generation defaults
|
| 18 |
+
GENERATION_CONFIG = {
|
| 19 |
+
"temperature": 0.7,
|
| 20 |
+
"top_p": 0.9,
|
| 21 |
+
"top_k": 40,
|
| 22 |
+
"repeat_penalty": 1.1,
|
| 23 |
+
"stop": ["```", "\n\n\n", "Human:", "Assistant:"],
|
| 24 |
+
}
|
| 25 |
+
|
| 26 |
+
# Hugging Face Space specific settings
|
| 27 |
+
HF_SPACE_CONFIG = {
|
| 28 |
+
"max_memory_usage": "2GB", # Conservative memory usage
|
| 29 |
+
"timeout_seconds": 30, # Request timeout
|
| 30 |
+
"enable_cpu_only": True, # Force CPU inference
|
| 31 |
+
}
|
| 32 |
+
|
| 33 |
+
# Model download settings
|
| 34 |
+
MODEL_DOWNLOAD_CONFIG = {
|
| 35 |
+
"cache_dir": "./models",
|
| 36 |
+
"use_auth_token": os.getenv("HF_TOKEN", None),
|
| 37 |
+
"resume_download": True,
|
| 38 |
+
}
|
| 39 |
+
|
| 40 |
+
# Recommended small GGUF models for demonstration
|
| 41 |
+
RECOMMENDED_MODELS = [
|
| 42 |
+
{
|
| 43 |
+
"name": "Osmosis-Structure-0.6B",
|
| 44 |
+
"repo_id": "osmosis-ai/Osmosis-Structure-0.6B",
|
| 45 |
+
"filename": "Osmosis-Structure-0.6B-BF16.gguf",
|
| 46 |
+
"size": "~1.2GB",
|
| 47 |
+
"description": "Osmosis AI structure-focused model for JSON generation"
|
| 48 |
+
},
|
| 49 |
+
{
|
| 50 |
+
"name": "TinyLlama-1.1B-Chat-v1.0-GGUF",
|
| 51 |
+
"repo_id": "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF",
|
| 52 |
+
"filename": "tinyllama-1.1b-chat-v1.0.q4_k_m.gguf",
|
| 53 |
+
"size": "~700MB",
|
| 54 |
+
"description": "Small, fast model good for testing"
|
| 55 |
+
}
|
| 56 |
+
]
|
| 57 |
+
|
| 58 |
+
def get_model_config():
|
| 59 |
+
"""Get model configuration optimized for HF Spaces"""
|
| 60 |
+
return MODEL_CONFIG.copy()
|
| 61 |
+
|
| 62 |
+
def get_generation_config():
|
| 63 |
+
"""Get generation configuration"""
|
| 64 |
+
return GENERATION_CONFIG.copy()
|
| 65 |
+
|
| 66 |
+
def get_recommended_model():
|
| 67 |
+
"""Get the recommended model for this space"""
|
| 68 |
+
return RECOMMENDED_MODELS[0] # Return TinyLlama as default
|
download_model.py
ADDED
|
@@ -0,0 +1,106 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
Download a sample GGUF model for testing llama.cpp integration
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import os
|
| 7 |
+
from huggingface_hub import hf_hub_download
|
| 8 |
+
from config import get_recommended_model, MODEL_DOWNLOAD_CONFIG
|
| 9 |
+
|
| 10 |
+
def download_sample_model():
|
| 11 |
+
"""Download a recommended small model for testing"""
|
| 12 |
+
model_info = get_recommended_model()
|
| 13 |
+
|
| 14 |
+
print(f"π₯ Downloading {model_info['name']}...")
|
| 15 |
+
print(f" Repository: {model_info['repo_id']}")
|
| 16 |
+
print(f" File: {model_info['filename']}")
|
| 17 |
+
print(f" Size: {model_info['size']}")
|
| 18 |
+
print(f" Description: {model_info['description']}")
|
| 19 |
+
|
| 20 |
+
try:
|
| 21 |
+
# Create models directory if it doesn't exist
|
| 22 |
+
os.makedirs(MODEL_DOWNLOAD_CONFIG['cache_dir'], exist_ok=True)
|
| 23 |
+
|
| 24 |
+
# Download the model
|
| 25 |
+
model_path = hf_hub_download(
|
| 26 |
+
repo_id=model_info['repo_id'],
|
| 27 |
+
filename=model_info['filename'],
|
| 28 |
+
cache_dir=MODEL_DOWNLOAD_CONFIG['cache_dir'],
|
| 29 |
+
resume_download=MODEL_DOWNLOAD_CONFIG['resume_download'],
|
| 30 |
+
token=MODEL_DOWNLOAD_CONFIG['use_auth_token']
|
| 31 |
+
)
|
| 32 |
+
|
| 33 |
+
print(f"β
Model downloaded successfully!")
|
| 34 |
+
print(f" Path: {model_path}")
|
| 35 |
+
|
| 36 |
+
# Create a symlink in the models directory for easy access
|
| 37 |
+
symlink_path = os.path.join(MODEL_DOWNLOAD_CONFIG['cache_dir'], "model.gguf")
|
| 38 |
+
if os.path.exists(symlink_path):
|
| 39 |
+
os.remove(symlink_path)
|
| 40 |
+
|
| 41 |
+
try:
|
| 42 |
+
os.symlink(model_path, symlink_path)
|
| 43 |
+
print(f" Symlink created: {symlink_path}")
|
| 44 |
+
except OSError:
|
| 45 |
+
# Symlinks might not work on all systems, just copy the path
|
| 46 |
+
print(f" Use this path in your code: {model_path}")
|
| 47 |
+
|
| 48 |
+
return model_path
|
| 49 |
+
|
| 50 |
+
except Exception as e:
|
| 51 |
+
print(f"β Error downloading model: {e}")
|
| 52 |
+
print("π‘ You can manually download a GGUF model and place it in ./models/")
|
| 53 |
+
return None
|
| 54 |
+
|
| 55 |
+
def list_available_models():
|
| 56 |
+
"""List models available in the models directory"""
|
| 57 |
+
models_dir = MODEL_DOWNLOAD_CONFIG['cache_dir']
|
| 58 |
+
|
| 59 |
+
if not os.path.exists(models_dir):
|
| 60 |
+
print(f"π Models directory doesn't exist: {models_dir}")
|
| 61 |
+
return []
|
| 62 |
+
|
| 63 |
+
model_files = []
|
| 64 |
+
for file in os.listdir(models_dir):
|
| 65 |
+
if file.endswith('.gguf') or file.endswith('.ggml'):
|
| 66 |
+
file_path = os.path.join(models_dir, file)
|
| 67 |
+
file_size = os.path.getsize(file_path)
|
| 68 |
+
model_files.append({
|
| 69 |
+
'name': file,
|
| 70 |
+
'path': file_path,
|
| 71 |
+
'size_mb': file_size / (1024 * 1024)
|
| 72 |
+
})
|
| 73 |
+
|
| 74 |
+
if model_files:
|
| 75 |
+
print("π Available models:")
|
| 76 |
+
for model in model_files:
|
| 77 |
+
print(f" - {model['name']} ({model['size_mb']:.1f} MB)")
|
| 78 |
+
else:
|
| 79 |
+
print("π No GGUF/GGML models found in models directory")
|
| 80 |
+
|
| 81 |
+
return model_files
|
| 82 |
+
|
| 83 |
+
if __name__ == "__main__":
|
| 84 |
+
print("π€ Model Download Utility for llama.cpp")
|
| 85 |
+
print("=" * 50)
|
| 86 |
+
|
| 87 |
+
# List existing models
|
| 88 |
+
print("\nπ Checking for existing models...")
|
| 89 |
+
existing_models = list_available_models()
|
| 90 |
+
|
| 91 |
+
if not existing_models:
|
| 92 |
+
print("\nπ₯ No models found. Downloading sample model...")
|
| 93 |
+
download_sample_model()
|
| 94 |
+
else:
|
| 95 |
+
print(f"\nβ
Found {len(existing_models)} existing model(s)")
|
| 96 |
+
|
| 97 |
+
# Ask if user wants to download another model
|
| 98 |
+
print("\nβ Download sample model anyway? (y/n): ", end="")
|
| 99 |
+
try:
|
| 100 |
+
response = input().lower().strip()
|
| 101 |
+
if response in ['y', 'yes']:
|
| 102 |
+
download_sample_model()
|
| 103 |
+
else:
|
| 104 |
+
print("π Using existing models")
|
| 105 |
+
except (EOFError, KeyboardInterrupt):
|
| 106 |
+
print("\nπ Using existing models")
|
install_verify.py
ADDED
|
@@ -0,0 +1,123 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
Installation verification script for llama.cpp in Hugging Face Space
|
| 4 |
+
Run this to verify that llama.cpp is properly installed and configured
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
import subprocess
|
| 8 |
+
import sys
|
| 9 |
+
import os
|
| 10 |
+
|
| 11 |
+
def run_command(command, description):
|
| 12 |
+
"""Run a command and return success status"""
|
| 13 |
+
print(f"π {description}...")
|
| 14 |
+
try:
|
| 15 |
+
result = subprocess.run(command, shell=True, capture_output=True, text=True, timeout=30)
|
| 16 |
+
if result.returncode == 0:
|
| 17 |
+
print(f"β
{description} - SUCCESS")
|
| 18 |
+
if result.stdout.strip():
|
| 19 |
+
print(f" Output: {result.stdout.strip()}")
|
| 20 |
+
return True
|
| 21 |
+
else:
|
| 22 |
+
print(f"β {description} - FAILED")
|
| 23 |
+
if result.stderr.strip():
|
| 24 |
+
print(f" Error: {result.stderr.strip()}")
|
| 25 |
+
return False
|
| 26 |
+
except subprocess.TimeoutExpired:
|
| 27 |
+
print(f"β° {description} - TIMEOUT")
|
| 28 |
+
return False
|
| 29 |
+
except Exception as e:
|
| 30 |
+
print(f"β {description} - ERROR: {e}")
|
| 31 |
+
return False
|
| 32 |
+
|
| 33 |
+
def check_python_version():
|
| 34 |
+
"""Check Python version compatibility"""
|
| 35 |
+
version = sys.version_info
|
| 36 |
+
print(f"π Python version: {version.major}.{version.minor}.{version.micro}")
|
| 37 |
+
|
| 38 |
+
if version.major >= 3 and version.minor >= 8:
|
| 39 |
+
print("β
Python version is compatible")
|
| 40 |
+
return True
|
| 41 |
+
else:
|
| 42 |
+
print("β Python version should be 3.8 or higher")
|
| 43 |
+
return False
|
| 44 |
+
|
| 45 |
+
def check_system_packages():
|
| 46 |
+
"""Check if required system packages are available"""
|
| 47 |
+
packages = ["gcc", "g++", "cmake", "make"]
|
| 48 |
+
results = []
|
| 49 |
+
|
| 50 |
+
for package in packages:
|
| 51 |
+
success = run_command(f"which {package}", f"Checking {package}")
|
| 52 |
+
results.append(success)
|
| 53 |
+
|
| 54 |
+
return all(results)
|
| 55 |
+
|
| 56 |
+
def install_and_test_llamacpp():
|
| 57 |
+
"""Install and test llama-cpp-python"""
|
| 58 |
+
print("\nπ¦ Installing llama-cpp-python...")
|
| 59 |
+
|
| 60 |
+
# Install llama-cpp-python
|
| 61 |
+
install_success = run_command(
|
| 62 |
+
f"{sys.executable} -m pip install llama-cpp-python --verbose",
|
| 63 |
+
"Installing llama-cpp-python"
|
| 64 |
+
)
|
| 65 |
+
|
| 66 |
+
if not install_success:
|
| 67 |
+
print("β Failed to install llama-cpp-python")
|
| 68 |
+
return False
|
| 69 |
+
|
| 70 |
+
# Test import
|
| 71 |
+
test_success = run_command(
|
| 72 |
+
f"{sys.executable} -c 'from llama_cpp import Llama; print(\"Import successful\")'",
|
| 73 |
+
"Testing llama-cpp-python import"
|
| 74 |
+
)
|
| 75 |
+
|
| 76 |
+
return test_success
|
| 77 |
+
|
| 78 |
+
def main():
|
| 79 |
+
"""Main verification function"""
|
| 80 |
+
print("π llama.cpp Installation Verification for Hugging Face Space")
|
| 81 |
+
print("=" * 70)
|
| 82 |
+
|
| 83 |
+
checks = [
|
| 84 |
+
("Python Version", check_python_version),
|
| 85 |
+
("System Packages", check_system_packages),
|
| 86 |
+
("llama-cpp-python Installation", install_and_test_llamacpp),
|
| 87 |
+
]
|
| 88 |
+
|
| 89 |
+
results = []
|
| 90 |
+
for check_name, check_func in checks:
|
| 91 |
+
print(f"\nπ§ͺ Running: {check_name}")
|
| 92 |
+
print("-" * 40)
|
| 93 |
+
result = check_func()
|
| 94 |
+
results.append(result)
|
| 95 |
+
print()
|
| 96 |
+
|
| 97 |
+
print("=" * 70)
|
| 98 |
+
print("π VERIFICATION SUMMARY:")
|
| 99 |
+
|
| 100 |
+
for i, (check_name, _) in enumerate(checks):
|
| 101 |
+
status = "β
PASSED" if results[i] else "β FAILED"
|
| 102 |
+
print(f" {check_name}: {status}")
|
| 103 |
+
|
| 104 |
+
if all(results):
|
| 105 |
+
print("\nπ ALL CHECKS PASSED!")
|
| 106 |
+
print("β
llama.cpp is successfully installed and ready to use.")
|
| 107 |
+
print("\nπ Next steps:")
|
| 108 |
+
print(" 1. Run 'python test_llamacpp.py' to test the integration")
|
| 109 |
+
print(" 2. Start your Gradio app with 'python app.py'")
|
| 110 |
+
print(" 3. Upload a GGUF model file to enable full functionality")
|
| 111 |
+
else:
|
| 112 |
+
print("\nβ οΈ SOME CHECKS FAILED!")
|
| 113 |
+
print("β Please review the errors above and fix them before proceeding.")
|
| 114 |
+
print("\nπ§ Common solutions:")
|
| 115 |
+
print(" - Ensure build tools are installed (build-essential, cmake)")
|
| 116 |
+
print(" - Check that you have sufficient memory and disk space")
|
| 117 |
+
print(" - Try reinstalling with: pip install --force-reinstall llama-cpp-python")
|
| 118 |
+
|
| 119 |
+
return all(results)
|
| 120 |
+
|
| 121 |
+
if __name__ == "__main__":
|
| 122 |
+
success = main()
|
| 123 |
+
sys.exit(0 if success else 1)
|
setup_and_run.py
ADDED
|
@@ -0,0 +1,108 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
Setup and run script for the llama.cpp Hugging Face Space
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import subprocess
|
| 7 |
+
import sys
|
| 8 |
+
import os
|
| 9 |
+
|
| 10 |
+
def install_dependencies():
|
| 11 |
+
"""Install required dependencies"""
|
| 12 |
+
print("π¦ Installing dependencies...")
|
| 13 |
+
|
| 14 |
+
try:
|
| 15 |
+
# Upgrade pip first
|
| 16 |
+
subprocess.run([sys.executable, "-m", "pip", "install", "--upgrade", "pip"], check=True)
|
| 17 |
+
|
| 18 |
+
# Install requirements
|
| 19 |
+
subprocess.run([sys.executable, "-m", "pip", "install", "-r", "requirements.txt"], check=True)
|
| 20 |
+
|
| 21 |
+
print("β
Dependencies installed successfully!")
|
| 22 |
+
return True
|
| 23 |
+
|
| 24 |
+
except subprocess.CalledProcessError as e:
|
| 25 |
+
print(f"β Error installing dependencies: {e}")
|
| 26 |
+
return False
|
| 27 |
+
|
| 28 |
+
def test_installation():
|
| 29 |
+
"""Test if llama.cpp is properly installed"""
|
| 30 |
+
print("π§ͺ Testing llama.cpp installation...")
|
| 31 |
+
|
| 32 |
+
try:
|
| 33 |
+
# Test import
|
| 34 |
+
subprocess.run([sys.executable, "-c", "from llama_cpp import Llama; print('β
llama-cpp-python imported successfully')"], check=True)
|
| 35 |
+
|
| 36 |
+
# Test other dependencies
|
| 37 |
+
test_imports = [
|
| 38 |
+
"import gradio; print('β
Gradio imported')",
|
| 39 |
+
"import huggingface_hub; print('β
Hugging Face Hub imported')",
|
| 40 |
+
"from config import get_recommended_model; print('β
Config imported')"
|
| 41 |
+
]
|
| 42 |
+
|
| 43 |
+
for test_import in test_imports:
|
| 44 |
+
subprocess.run([sys.executable, "-c", test_import], check=True)
|
| 45 |
+
|
| 46 |
+
print("β
All tests passed!")
|
| 47 |
+
return True
|
| 48 |
+
|
| 49 |
+
except subprocess.CalledProcessError as e:
|
| 50 |
+
print(f"β Installation test failed: {e}")
|
| 51 |
+
return False
|
| 52 |
+
|
| 53 |
+
def run_app():
|
| 54 |
+
"""Run the Gradio app"""
|
| 55 |
+
print("π Starting the Gradio app...")
|
| 56 |
+
print("π Note: The Osmosis model will be downloaded on first use")
|
| 57 |
+
print("π The app will be available at http://localhost:7860")
|
| 58 |
+
print("βΉοΈ Press Ctrl+C to stop the app")
|
| 59 |
+
|
| 60 |
+
try:
|
| 61 |
+
subprocess.run([sys.executable, "app.py"], check=True)
|
| 62 |
+
except KeyboardInterrupt:
|
| 63 |
+
print("\nπ App stopped by user")
|
| 64 |
+
except subprocess.CalledProcessError as e:
|
| 65 |
+
print(f"β Error running app: {e}")
|
| 66 |
+
|
| 67 |
+
def main():
|
| 68 |
+
"""Main setup function"""
|
| 69 |
+
print("π§ llama.cpp Hugging Face Space Setup")
|
| 70 |
+
print("=" * 50)
|
| 71 |
+
|
| 72 |
+
# Check Python version
|
| 73 |
+
if sys.version_info < (3, 8):
|
| 74 |
+
print("β Python 3.8 or higher is required")
|
| 75 |
+
sys.exit(1)
|
| 76 |
+
|
| 77 |
+
print(f"β
Python version: {sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}")
|
| 78 |
+
|
| 79 |
+
# Install dependencies
|
| 80 |
+
if not install_dependencies():
|
| 81 |
+
print("β Failed to install dependencies")
|
| 82 |
+
sys.exit(1)
|
| 83 |
+
|
| 84 |
+
# Test installation
|
| 85 |
+
if not test_installation():
|
| 86 |
+
print("β Installation test failed")
|
| 87 |
+
sys.exit(1)
|
| 88 |
+
|
| 89 |
+
print("\nπ Setup completed successfully!")
|
| 90 |
+
print("\nπ What's installed:")
|
| 91 |
+
print(" - llama-cpp-python for efficient CPU inference")
|
| 92 |
+
print(" - Gradio for the web interface")
|
| 93 |
+
print(" - Hugging Face Hub for model downloading")
|
| 94 |
+
print(" - Osmosis Structure 0.6B model (will download on first use)")
|
| 95 |
+
|
| 96 |
+
# Ask if user wants to run the app
|
| 97 |
+
print("\nβ Would you like to run the app now? (y/n): ", end="")
|
| 98 |
+
try:
|
| 99 |
+
response = input().lower().strip()
|
| 100 |
+
if response in ['y', 'yes']:
|
| 101 |
+
run_app()
|
| 102 |
+
else:
|
| 103 |
+
print("π Setup complete! Run 'python app.py' when ready.")
|
| 104 |
+
except (EOFError, KeyboardInterrupt):
|
| 105 |
+
print("\nπ Setup complete! Run 'python app.py' when ready.")
|
| 106 |
+
|
| 107 |
+
if __name__ == "__main__":
|
| 108 |
+
main()
|