Only about 30 notes can be generated

by tatookan - opened Sep 19, 2025

Sep 19, 2025

I tried many scripts with AI, but all of them could only generate around 30 notes

'''
import torch
from transformers import pipeline
import symusic

print("🚀 正在加载 dx2102/llama-midi 模型...")

pipe = pipeline(
"text-generation",
model="dx2102/llama-midi",
torch_dtype=torch.bfloat16,
device="cuda" if torch.cuda.is_available() else "cpu",
)

print("✅ 模型加载完成！")

使用强力提示词一次性生成长音乐

powerful_prompt = '''Bach style complete Baroque fugue with extensive development
Generate a full musical composition with 500+ notes
Continue with proper counterpoint and harmonic progression
Do not stop early - complete the entire musical piece

pitch duration wait velocity instrument
62 480 0 72 0
65 480 480 72 0
69 960 960 72 0
71 480 0 72 0
74 480 480 72 0
77 960 960 72 0
'''

print("🎹 正在一次性生成完整音乐...")

一次性生成大量内容

result = pipe(
powerful_prompt.strip(),
max_length=4000, # 大幅增加生成长度
temperature=0.9,
top_p=0.95,
do_sample=True,
repetition_penalty=1.1,
num_return_sequences=1,
)

generated_text = result[0]['generated_text']
print(f"📊 生成文本长度: {len(generated_text)} 字符")

显示部分内容

print("\n📋 生成预览:")
lines = generated_text.split('\n')
data_lines = [line for line in lines if line.strip() and line.split()[0].isdigit()]
for i, line in enumerate(data_lines[:15]):
print(f" {i+1:2d}: {line}")
if len(data_lines) > 15:
print(f" ... (还有 {len(data_lines)-15} 行)")

后处理函数

def postprocess(txt, path):
if isinstance(txt, list):
txt = txt[0]['generated_text']

# 提取MIDI数据部分
if 'pitch duration wait velocity instrument' in txt:
    parts = txt.split('pitch duration wait velocity instrument')
    if len(parts) > 1:
        txt = parts[1].strip()
    else:
        txt = parts[0].strip()

tracks = {}
now = 0
note_count = 0
valid_lines = 0

lines = txt.split('\n')
print(f"📄 开始解析 {len(lines)} 行数据...")

for i, line in enumerate(lines):
    line = line.strip()
    if not line:
        continue
        
    parts = line.split()
    if len(parts) < 5:
        continue
        
    # 检查是否是数据行（第一个元素是数字）
    if not parts[0].isdigit():
        continue
        
    try:
        pitch, duration, wait, velocity, instrument = parts[:5]
        pitch = int(pitch)
        duration = int(duration)
        wait = int(wait)
        velocity = int(velocity)

        # 数据验证
        if not (21 <= pitch <= 108):  # 合理的音高范围
            continue
        if duration <= 0 or duration > 5000:
            continue
        if wait < 0 or wait > 5000:
            continue
        if velocity < 1 or velocity > 127:
            continue
            
        if instrument not in tracks:
            tracks[instrument] = symusic.core.TrackSecond()
            if instrument != 'drum':
                try:
                    tracks[instrument].program = int(instrument)
                except:
                    tracks[instrument].program = 0  # 默认钢琴
            else:
                tracks[instrument].is_drum = True
        
        tracks[instrument].notes.append(symusic.core.NoteSecond(
            time=now/1000,
            duration=duration/1000,
            pitch=pitch,
            velocity=min(velocity * 4, 127),
        ))
        now += wait
        note_count += 1
        valid_lines += 1
        
    except ValueError as e:
        continue
    except Exception as e:
        print(f"⚠️ 行 {i+1} 解析错误: {e}")
        continue

print(f"✅ 成功解析 {valid_lines} 个有效音符")

if note_count == 0:
    print("❌ 没有解析到任何有效音符")
    return

try:
    score = symusic.Score(ttype='Second')
    score.tracks.extend(tracks.values())
    
    # 添加音乐元数据
    score.tempos.append(symusic.core.TempoSecond(time=0, qpm=120))
    score.time_signatures.append(symusic.core.TimeSignatureSecond(time=0, num=4, den=4))
    
    score.dump_midi(path)
    print(f'💾 MIDI文件已保存: {path}')
    
    # 计算音乐时长
    total_ticks = max([note.time + note.duration for track in score.tracks for note in track.notes], default=0)
    total_seconds = total_ticks * 1000  # 转换回毫秒再计算
    minutes = total_seconds / 60
    print(f"⏱️  音乐时长: {minutes:.2f} 分钟")
    
except Exception as e:
    print('❌ 保存错误:', e)

保存结果

postprocess(generated_text, './complete_music.mid')

print("🎉 生成完成！")

'''

dx2102

Owner Sep 23, 2025

•

edited Sep 23, 2025

Hello!

In fact, in the training data, the "music title" text seen by this model, is always a single line without any line breaks ('\n'). The next line is always blank, followed by another pitch duration wait velocity instrument line.

I think if you use a multi-line sheet music title, it might confuse the model, resulting in music with lower quality, and the model might prematurely output the <end_of_sequence> token.

Sorry for not mentioning this in the README. This model is indeed not so flexible to use at the moment!

Upload images, audio, and videos by dragging in the text input, pasting, or clicking here.

Tap or paste here to upload images

· Sign up or log in to comment