File size: 2,058 Bytes
1c70d34
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
#!/usr/bin/env python3
"""
Debug script to check data loading paths
"""
import os
import glob

data_dir = "."

print("=== Path Debug ===")
print(f"Current working directory: {os.getcwd()}")
print(f"Data directory: {data_dir}")
print(f"Absolute data directory: {os.path.abspath(data_dir)}")
print(f"Parent directory: {os.path.dirname(data_dir)}")
print(f"Parent absolute: {os.path.abspath(os.path.dirname(data_dir))}")

# Check song lyrics path
lyrics_dir = os.path.join(os.path.dirname(data_dir), "Songs Lyrics")
lyrics_abs = os.path.abspath(lyrics_dir)
print(f"\nSong lyrics directory: {lyrics_dir}")
print(f"Absolute lyrics path: {lyrics_abs}")
print(f"Lyrics directory exists: {os.path.exists(lyrics_abs)}")

if os.path.exists(lyrics_abs):
    lyrics_files = glob.glob(os.path.join(lyrics_abs, "*.md"))
    print(f"Lyrics .md files found: {len(lyrics_files)}")
    if lyrics_files:
        print(f"Sample files: {lyrics_files[:3]}")

# Check knowledge files path
knowledge_dir = os.path.join(os.path.dirname(data_dir), "Quillan Knowledge files")
knowledge_abs = os.path.abspath(knowledge_dir)
print(f"\nKnowledge directory: {knowledge_dir}")
print(f"Absolute knowledge path: {knowledge_abs}")
print(f"Knowledge directory exists: {os.path.exists(knowledge_abs)}")

if os.path.exists(knowledge_abs):
    knowledge_files = glob.glob(os.path.join(knowledge_abs, "*.md"))
    print(f"Knowledge .md files found: {len(knowledge_files)}")
    if knowledge_files:
        print(f"Sample files: {knowledge_files[:3]}")

# Check JSONL file
jsonl_path = os.path.join(data_dir, "Quillan_finetune_full_dataset.jsonl")
jsonl_abs = os.path.abspath(jsonl_path)
print(f"\nJSONL file: {jsonl_path}")
print(f"Absolute JSONL path: {jsonl_abs}")
print(f"JSONL file exists: {os.path.exists(jsonl_abs)}")

if os.path.exists(jsonl_abs):
    with open(jsonl_abs, 'r', encoding='utf-8') as f:
        lines = f.readlines()
        print(f"JSONL lines: {len(lines)}")
        if lines:
            print(f"First line preview: {lines[0][:100]}...")

print("\n=== Debug Complete ===")