Safetensors
llama

Only about 30 notes can be generated

#3
by tatookan - opened

I tried many scripts with AI, but all of them could only generate around 30 notes

'''
import torch
from transformers import pipeline
import symusic

print("🚀 正在加载 dx2102/llama-midi 模型...")

pipe = pipeline(
"text-generation",
model="dx2102/llama-midi",
torch_dtype=torch.bfloat16,
device="cuda" if torch.cuda.is_available() else "cpu",
)

print("✅ 模型加载完成!")

使用强力提示词一次性生成长音乐

powerful_prompt = '''Bach style complete Baroque fugue with extensive development
Generate a full musical composition with 500+ notes
Continue with proper counterpoint and harmonic progression
Do not stop early - complete the entire musical piece

pitch duration wait velocity instrument
62 480 0 72 0
65 480 480 72 0
69 960 960 72 0
71 480 0 72 0
74 480 480 72 0
77 960 960 72 0
'''

print("🎹 正在一次性生成完整音乐...")

一次性生成大量内容

result = pipe(
powerful_prompt.strip(),
max_length=4000, # 大幅增加生成长度
temperature=0.9,
top_p=0.95,
do_sample=True,
repetition_penalty=1.1,
num_return_sequences=1,
)

generated_text = result[0]['generated_text']
print(f"📊 生成文本长度: {len(generated_text)} 字符")

显示部分内容

print("\n📋 生成预览:")
lines = generated_text.split('\n')
data_lines = [line for line in lines if line.strip() and line.split()[0].isdigit()]
for i, line in enumerate(data_lines[:15]):
print(f" {i+1:2d}: {line}")
if len(data_lines) > 15:
print(f" ... (还有 {len(data_lines)-15} 行)")

后处理函数

def postprocess(txt, path):
if isinstance(txt, list):
txt = txt[0]['generated_text']

# 提取MIDI数据部分
if 'pitch duration wait velocity instrument' in txt:
    parts = txt.split('pitch duration wait velocity instrument')
    if len(parts) > 1:
        txt = parts[1].strip()
    else:
        txt = parts[0].strip()

tracks = {}
now = 0
note_count = 0
valid_lines = 0

lines = txt.split('\n')
print(f"📄 开始解析 {len(lines)} 行数据...")

for i, line in enumerate(lines):
    line = line.strip()
    if not line:
        continue
        
    parts = line.split()
    if len(parts) < 5:
        continue
        
    # 检查是否是数据行(第一个元素是数字)
    if not parts[0].isdigit():
        continue
        
    try:
        pitch, duration, wait, velocity, instrument = parts[:5]
        pitch = int(pitch)
        duration = int(duration)
        wait = int(wait)
        velocity = int(velocity)

        # 数据验证
        if not (21 <= pitch <= 108):  # 合理的音高范围
            continue
        if duration <= 0 or duration > 5000:
            continue
        if wait < 0 or wait > 5000:
            continue
        if velocity < 1 or velocity > 127:
            continue
            
        if instrument not in tracks:
            tracks[instrument] = symusic.core.TrackSecond()
            if instrument != 'drum':
                try:
                    tracks[instrument].program = int(instrument)
                except:
                    tracks[instrument].program = 0  # 默认钢琴
            else:
                tracks[instrument].is_drum = True
        
        tracks[instrument].notes.append(symusic.core.NoteSecond(
            time=now/1000,
            duration=duration/1000,
            pitch=pitch,
            velocity=min(velocity * 4, 127),
        ))
        now += wait
        note_count += 1
        valid_lines += 1
        
    except ValueError as e:
        continue
    except Exception as e:
        print(f"⚠️ 行 {i+1} 解析错误: {e}")
        continue

print(f"✅ 成功解析 {valid_lines} 个有效音符")

if note_count == 0:
    print("❌ 没有解析到任何有效音符")
    return

try:
    score = symusic.Score(ttype='Second')
    score.tracks.extend(tracks.values())
    
    # 添加音乐元数据
    score.tempos.append(symusic.core.TempoSecond(time=0, qpm=120))
    score.time_signatures.append(symusic.core.TimeSignatureSecond(time=0, num=4, den=4))
    
    score.dump_midi(path)
    print(f'💾 MIDI文件已保存: {path}')
    
    # 计算音乐时长
    total_ticks = max([note.time + note.duration for track in score.tracks for note in track.notes], default=0)
    total_seconds = total_ticks * 1000  # 转换回毫秒再计算
    minutes = total_seconds / 60
    print(f"⏱️  音乐时长: {minutes:.2f} 分钟")
    
except Exception as e:
    print('❌ 保存错误:', e)

保存结果

postprocess(generated_text, './complete_music.mid')

print("🎉 生成完成!")

'''

Hello!

In fact, in the training data, the "music title" text seen by this model, is always a single line without any line breaks ('\n'). The next line is always blank, followed by another pitch duration wait velocity instrument line.

I think if you use a multi-line sheet music title, it might confuse the model, resulting in music with lower quality, and the model might prematurely output the <end_of_sequence> token.

Sorry for not mentioning this in the README. This model is indeed not so flexible to use at the moment!

Sign up or log in to comment