MiniMind / android /app /Mind2Model.kt
fariasultana's picture
MiniMind Max2 - Efficient MoE Language Model
8b187bb verified
package com.minimind.mind2
import android.content.Context
import kotlinx.coroutines.*
import kotlinx.coroutines.flow.*
import java.io.File
/**
* MiniMind (Mind2) Model Interface
* Kotlin wrapper for native llama.cpp inference
*/
class Mind2Model private constructor() {
companion object {
init {
System.loadLibrary("mind2")
}
private var instance: Mind2Model? = null
@JvmStatic
fun getInstance(): Mind2Model {
return instance ?: synchronized(this) {
instance ?: Mind2Model().also { instance = it }
}
}
}
// Model state
private var isLoaded = false
private var modelPath: String? = null
// Generation parameters
data class GenerationConfig(
val maxTokens: Int = 256,
val temperature: Float = 0.7f,
val topP: Float = 0.9f,
val topK: Int = 40,
val repeatPenalty: Float = 1.1f,
val stopTokens: List<String> = listOf("<|endoftext|>", "<|im_end|>")
)
/**
* Load model from assets or file path
*/
suspend fun load(
context: Context,
modelName: String = "mind2-lite.gguf",
contextLength: Int = 2048,
threads: Int = 0 // 0 = auto
): Result<Unit> = withContext(Dispatchers.IO) {
try {
// Check if model is in assets
val assetPath = "models/$modelName"
val modelFile = File(context.filesDir, modelName)
if (!modelFile.exists()) {
// Copy from assets
context.assets.open(assetPath).use { input ->
modelFile.outputStream().use { output ->
input.copyTo(output)
}
}
}
modelPath = modelFile.absolutePath
val success = nativeInit(modelPath!!, contextLength, threads)
if (success) {
isLoaded = true
Result.success(Unit)
} else {
Result.failure(RuntimeException("Failed to load model"))
}
} catch (e: Exception) {
Result.failure(e)
}
}
/**
* Generate text (non-streaming)
*/
suspend fun generate(
prompt: String,
config: GenerationConfig = GenerationConfig()
): Result<String> = withContext(Dispatchers.IO) {
if (!isLoaded) {
return@withContext Result.failure(IllegalStateException("Model not loaded"))
}
try {
val result = nativeGenerate(
prompt,
config.maxTokens,
config.temperature,
config.topP,
config.topK
)
Result.success(result)
} catch (e: Exception) {
Result.failure(e)
}
}
/**
* Generate text with streaming
*/
fun generateStream(
prompt: String,
config: GenerationConfig = GenerationConfig()
): Flow<String> = callbackFlow {
if (!isLoaded) {
throw IllegalStateException("Model not loaded")
}
val callback = object : TokenCallback {
override fun onToken(token: String) {
trySend(token)
}
override fun onComplete() {
channel.close()
}
}
nativeGenerateStream(
prompt,
config.maxTokens,
config.temperature,
config.topP,
config.topK,
callback
)
awaitClose { stop() }
}.flowOn(Dispatchers.IO)
/**
* Chat with conversation history
*/
suspend fun chat(
message: String,
history: List<ChatMessage> = emptyList(),
config: GenerationConfig = GenerationConfig()
): Result<String> {
val prompt = buildChatPrompt(message, history)
return generate(prompt, config)
}
/**
* Chat with streaming
*/
fun chatStream(
message: String,
history: List<ChatMessage> = emptyList(),
config: GenerationConfig = GenerationConfig()
): Flow<String> {
val prompt = buildChatPrompt(message, history)
return generateStream(prompt, config)
}
private fun buildChatPrompt(message: String, history: List<ChatMessage>): String {
val sb = StringBuilder()
// System prompt
sb.append("<|im_start|>system\n")
sb.append("You are Mind2, a helpful AI assistant running locally on this device.\n")
sb.append("<|im_end|>\n")
// History
for (msg in history) {
sb.append("<|im_start|>${msg.role}\n")
sb.append("${msg.content}\n")
sb.append("<|im_end|>\n")
}
// Current message
sb.append("<|im_start|>user\n")
sb.append("$message\n")
sb.append("<|im_end|>\n")
sb.append("<|im_start|>assistant\n")
return sb.toString()
}
/**
* Stop ongoing generation
*/
fun stop() {
nativeStop()
}
/**
* Release resources
*/
fun release() {
nativeRelease()
isLoaded = false
modelPath = null
}
/**
* Get model info
*/
fun getInfo(): String = nativeGetInfo()
/**
* Benchmark inference speed
*/
suspend fun benchmark(tokens: Int = 100): Float = withContext(Dispatchers.IO) {
nativeBenchmark(tokens)
}
// Native methods
private external fun nativeInit(modelPath: String, nCtx: Int, nThreads: Int): Boolean
private external fun nativeGenerate(
prompt: String,
maxTokens: Int,
temperature: Float,
topP: Float,
topK: Int
): String
private external fun nativeGenerateStream(
prompt: String,
maxTokens: Int,
temperature: Float,
topP: Float,
topK: Int,
callback: TokenCallback
)
private external fun nativeStop()
private external fun nativeRelease()
private external fun nativeGetInfo(): String
private external fun nativeBenchmark(nTokens: Int): Float
interface TokenCallback {
fun onToken(token: String)
fun onComplete()
}
data class ChatMessage(
val role: String, // "user" or "assistant"
val content: String
)
}
/**
* Extension function for easy initialization
*/
suspend fun Context.loadMind2Model(
modelName: String = "mind2-lite.gguf",
contextLength: Int = 2048
): Result<Mind2Model> {
val model = Mind2Model.getInstance()
return model.load(this, modelName, contextLength).map { model }
}