File size: 22,549 Bytes
4994b71
 
4c377bd
2fb2290
4994b71
9567ff8
4994b71
 
4c377bd
 
 
fc18f00
322a04b
33731b6
53b2b1d
686446c
33731b6
322a04b
4994b71
4c377bd
 
 
 
bbedd10
 
4c377bd
 
 
 
 
 
 
 
 
 
 
 
 
bbedd10
4c377bd
 
 
 
 
bbedd10
 
 
 
 
 
 
4c377bd
 
 
4994b71
322a04b
 
576366e
392cdf6
0152ed4
b8f5c0f
 
 
392cdf6
 
0152ed4
392cdf6
322a04b
392cdf6
576366e
392cdf6
0152ed4
33731b6
576366e
b8f5c0f
1b1c18c
 
33731b6
 
 
4994b71
 
0152ed4
4994b71
 
322a04b
33731b6
 
6911b3d
 
33731b6
 
 
 
1da8f51
33731b6
1da8f51
 
 
33731b6
 
 
 
 
2fb2290
 
 
 
aad53ab
 
 
 
 
 
 
 
 
 
 
 
 
 
80641ea
aad53ab
ac8d6e6
aad53ab
 
 
 
 
686446c
2fb2290
 
 
 
686446c
1b1c18c
 
 
 
 
 
 
 
 
 
2fb2290
4994b71
322a04b
b2fcbcc
2fb2290
0152ed4
a24c09f
2fb2290
 
 
 
 
a63d501
686446c
 
 
 
 
0152ed4
686446c
 
 
 
 
 
aede054
686446c
0152ed4
b2fcbcc
 
0152ed4
686446c
 
b2fcbcc
0152ed4
b2fcbcc
0152ed4
b2fcbcc
0152ed4
b2fcbcc
 
 
 
0152ed4
b2fcbcc
 
 
 
 
0152ed4
a63d501
 
 
686446c
a63d501
 
aede054
 
0152ed4
322a04b
b2fcbcc
 
322a04b
b2fcbcc
 
0152ed4
b2fcbcc
 
 
0152ed4
2969f34
 
322a04b
442b10a
8b9b46c
 
442b10a
8b9b46c
442b10a
 
 
 
 
 
 
8b9b46c
 
4994b71
 
bbedd10
 
4c377bd
 
bbedd10
 
4c377bd
4994b71
322a04b
4994b71
 
 
 
 
322a04b
4994b71
4c377bd
 
 
 
 
 
 
 
 
 
 
df44b90
 
 
 
 
 
 
4c377bd
 
 
 
 
 
 
dad8eb4
4c377bd
 
 
 
ac7664e
 
 
 
 
 
 
 
62b2ea5
 
 
 
ac7664e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21e295f
 
 
 
 
 
 
15536f8
 
f4e8551
a86fb4d
 
 
 
 
f4e8551
 
 
 
 
d0c7a11
 
 
f4e8551
d0c7a11
df44b90
bd2faef
 
 
df44b90
bd2faef
4c377bd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
322a04b
4994b71
 
 
 
322a04b
e2b66bb
4994b71
4c377bd
 
 
 
60bea6d
 
4994b71
322a04b
4994b71
 
 
 
 
 
 
 
 
 
 
322a04b
ff82603
33731b6
6911b3d
 
ff82603
33731b6
 
ff82603
6911b3d
 
 
 
 
 
33731b6
 
 
 
6911b3d
 
 
 
ff82603
33731b6
ff82603
 
4994b71
 
 
 
 
 
 
 
 
 
 
322a04b
4994b71
 
322a04b
4994b71
 
322a04b
4994b71
5909095
 
0152ed4
5909095
 
a998f2d
5909095
 
 
e10d2ca
5909095
f229c91
5909095
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
322a04b
a998f2d
 
 
 
 
 
 
 
 
 
 
 
 
9567ff8
322a04b
4994b71
322a04b
4994b71
5909095
4994b71
686446c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0cc0152
686446c
0cc0152
686446c
0cc0152
686446c
0cc0152
686446c
0cc0152
686446c
 
 
 
 
 
 
6911b3d
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
from typing import Dict, List, Optional
from pathlib import Path
from collections import defaultdict
import shutil
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain_chroma import Chroma
from langchain.chains import ConversationalRetrievalChain
from langchain.prompts import PromptTemplate
from langchain_core.callbacks import CallbackManagerForRetrieverRun
from langchain_core.documents import Document
from langchain_core.retrievers import BaseRetriever
from huggingface_hub import snapshot_download, hf_hub_download, HfApi
from config.settings import Settings
from tenacity import retry, stop_after_attempt, wait_exponential, retry_if_exception_type
from datasets import load_dataset
import sqlite3
from openai import APIConnectionError, RateLimitError


class DiverseRetriever(BaseRetriever):
    """Retriever that caps per-artist chunks to ensure diverse sources."""

    vector_store: Chroma
    fetch_k: int = 200
    max_per_artist: int = 2
    final_k: int = 20

    class Config:
        arbitrary_types_allowed = True

    def _get_relevant_documents(
        self, query: str, *, run_manager: CallbackManagerForRetrieverRun
    ) -> List[Document]:
        results = self.vector_store.similarity_search_with_score(
            query, k=self.fetch_k
        )
        artist_counts: dict = defaultdict(int)
        selected: List[Document] = []
        skipped: dict = defaultdict(int)
        for doc, _score in results:
            artist = doc.metadata.get("artist", "unknown")
            if artist_counts[artist] < self.max_per_artist:
                artist_counts[artist] += 1
                selected.append(doc)
                if len(selected) >= self.final_k:
                    break
            else:
                skipped[artist] += 1
        selected_artists = len({d.metadata.get("artist") for d in selected})
        print(f"DiverseRetriever: {len(selected)} chunks from {selected_artists} artists "
              f"(scanned {len(results)}, skipped: {dict(skipped)})")
        return selected


class LyricGenerator:
    def __init__(self):
        """Initialize the generator with embeddings"""
        print("Initializing LyricGenerator...")
        print(f"Deployment mode: {Settings.DEPLOYMENT_MODE}")

        # Debugging: Check if OpenAI API Key is loaded
        Settings.debug_openai_key()

        # Ensure paths exist (if local)
        Settings.ensure_embedding_paths()

        # Get and log paths
        self.embeddings_dir = Settings.get_embeddings_path()
        self.chroma_dir = Settings.get_chroma_path()
        print(f"Embeddings directory: {self.embeddings_dir}")
        print(f"Chroma directory: {self.chroma_dir}")

        # Initialize OpenAI embeddings with retry
        print("Setting up OpenAI embeddings...")
        if not Settings.OPENAI_API_KEY:
            raise RuntimeError(
                "OpenAI API key is not set. Please configure it in your environment variables or HuggingFace Secrets.")
        
        self.embeddings = self._create_embeddings_with_retry()
        
        self.vector_store = None
        self.qa_chain = None

        # Load embeddings
        self._load_embeddings()

    @retry(
        retry=retry_if_exception_type((APIConnectionError, RateLimitError)),
        wait=wait_exponential(multiplier=2, min=4, max=60),
        stop=stop_after_attempt(10)
    )
    def _create_embeddings_with_retry(self):
        """Create OpenAI embeddings with retry logic"""
        try:
            api_key = Settings.OPENAI_API_KEY.strip()  # Clean the key
            return OpenAIEmbeddings(
                openai_api_key=api_key,
                timeout=60,
                openai_proxy=None
            )
        except Exception as e:
            print(f"Error creating embeddings: {type(e).__name__}: {str(e)}")
            raise

    def _setup_embeddings_from_hf(self) -> None:
        """Download and setup embeddings from HuggingFace dataset"""
        print("\n=== Setting up embeddings from HuggingFace dataset ===")
        try:
            # Force fresh download of the dataset to ensure latest HNSW index
            print("Downloading latest dataset snapshot...")
            snapshot_path = snapshot_download(
                repo_id=Settings.HF_DATASET,
                repo_type="dataset",
                token=Settings.HF_TOKEN,
                cache_dir="/data",
            )
            chroma_path = Path(snapshot_path) / "chroma"
            print(f"Downloaded snapshot to: {snapshot_path}")

            if not chroma_path.exists():
                raise RuntimeError(f"chroma/ not found in snapshot at {chroma_path}")

            # Set the chroma directory
            self.chroma_dir = chroma_path
            print(f"Chroma directory set to: {self.chroma_dir}")

            # Log index files for debugging
            for f in sorted(chroma_path.rglob("*")):
                if f.is_file():
                    print(f"  {f.name}: {f.stat().st_size / (1024*1024):.1f} MB")
            
        except Exception as e:
            print(f"\n=== Error in _setup_embeddings_from_hf ===")
            print(f"Error type: {type(e).__name__}")
            print(f"Error message: {str(e)}")
            raise RuntimeError(f"Failed to setup embeddings from HuggingFace: {str(e)}")

    def _list_cache_directory(self, cache_dir_path: str) -> None:
        """List the contents of the cache directory"""
        cache_dir = Path(cache_dir_path)
        if cache_dir.exists():
            print(f"Contents of {cache_dir_path} directory:")
            for item in cache_dir.iterdir():
                print(f"- {item.name}")
        else:
            print(f"{cache_dir_path} directory does not exist.")

    def _load_embeddings(self) -> None:
        """Load existing embeddings based on environment"""
        try:
            print("\n=== Loading Embeddings ===")

            # Determine the environment and set paths accordingly
            if Settings.is_huggingface():
                print("HuggingFace environment detected, setting up embeddings...")
                self._setup_embeddings_from_hf()
            else:
                print("Local environment detected")
                print(f"Base directory: {Settings.BASE_DIR}")
                
                # Verify local paths
                if not self.chroma_dir.exists():
                    raise RuntimeError(
                        f"Chroma directory not found at {self.chroma_dir}")

                sqlite_file = self.chroma_dir / "chroma.sqlite3"
                print(f"Checking SQLite file: {sqlite_file}")
                if not sqlite_file.exists():
                    print(f"Directory contents: {list(self.chroma_dir.glob('**/*'))}")
                    raise RuntimeError(
                        f"Chroma database not found at {sqlite_file}")
                print(
                    f"SQLite file size: {sqlite_file.stat().st_size / (1024*1024):.2f} MB")

            # Load vector store using environment-aware settings
            print("Initializing Chroma with settings:")
            chroma_settings = Settings.get_chroma_settings()
            print(f"Using persist directory: {chroma_settings['persist_directory']}")
            
            self.vector_store = Chroma(
                persist_directory=chroma_settings["persist_directory"],
                embedding_function=self.embeddings,
                collection_name=chroma_settings["collection_name"]
            )

            # Verify collection has documents
            collection = self.vector_store._collection
            count = collection.count()
            print(f"Collection contains {count} documents")

            if count == 0:
                print("Collection is empty, checking details...")
                # Try to peek at the collection data
                peek = collection.peek()
                print(f"Collection peek: {peek}")

                # Additional debugging for empty collection
                print("\nDebug Information:")
                print(f"Chroma directory structure:")
                for item in self.chroma_dir.glob('**/*'):
                    print(f"  {item}")
                    if item.is_file():
                        print(
                            f"    Size: {item.stat().st_size / (1024*1024):.2f} MB")

                raise RuntimeError(
                    "Chroma DB is empty. Please ensure embeddings "
                    "were properly generated and uploaded."
                )
            else:
                print("Successfully loaded embeddings")

        except Exception as e:
            print(f"Error loading embeddings: {str(e)}")
            raise RuntimeError(f"Failed to load embeddings: {str(e)}")

        # Setup QA chain
        self._setup_qa_chain()

    def _find_chroma_directory(self, base_path: str) -> Optional[Path]:
        """Find the Chroma directory within the base path"""
        base_dir = Path(base_path)
        print(f"Searching for Chroma directory in: {base_dir}")
        for subdir in base_dir.iterdir():
            print(f"Checking subdir: {subdir}")
            if subdir.is_dir():
                print(f"Subdir contents: {list(subdir.iterdir())}")
                if (subdir / "chroma.sqlite3").exists():
                    print(f"Chroma directory found: {subdir}")
                    return subdir
        print("Chroma directory not found.")
        return None

    def _setup_qa_chain(self) -> None:
        """Initialize the QA chain for generating lyrics"""
        # Configure diverse retriever: fetch 200, cap 2 per artist, return top 20
        # Guarantees 10+ unique artists in every retrieval
        retriever = DiverseRetriever(
            vector_store=self.vector_store,
            fetch_k=200,
            max_per_artist=2,
            final_k=20,
        )

        # Create document prompt
        document_prompt = PromptTemplate(
            input_variables=["page_content"],
            template="{page_content}"
        )

        # System prompt template
        system_template = """You are a professional songwriter. Your ONLY output is lyrics with section markers. No analysis. No explanation. No commentary. No source references. Nothing before the lyrics. Nothing after the lyrics.

OUTPUT FORMAT:
[Section Name]
lyrics here

[Next Section]
lyrics here

That is it. Section markers in brackets, lyrics below each one. Nothing else.

STRICT SECTION LIMITS:
- Verses: 8-16 lines maximum.
- Pre-Chorus: 2-4 lines.
- Chorus/Hook: 4-8 lines.
- Bridge: 4-8 lines.
Do not exceed these limits.

BANNED WORDS — never use any of these:
neon, algorithm, digital, phantom, pixel, shadow, reflection, concrete jungle, echo chamber, midnight, cypher, whisper, canvas, tapestry, labyrinth, mosaic, symphony, aurora, ethereal, cosmic, celestial, visceral, transcend, paradigm, ultrapixel, emotional phantom

SPECIFICITY RULES — every line must follow these:
1. SCENES over concepts — put the listener in a specific place with objects they can see
2. OBJECTS over adjectives — name the actual thing (a dented Ford Ranger, not "a broken vehicle")
3. CONSEQUENCES over metaphors — show what happened, not what it was like
4. TEMPORAL GROUNDING — anchor moments in time when it serves the scene, but vary how (a season, a semester, a shift at work, the age you were). Do not default to "day of the week + exact clock time" — that is one option among many.
5. DOMESTIC DETAIL — kitchen tables, screen doors, parking lots, unwashed mugs, not abstract spaces
6. GUT-PUNCH MOMENTS — one line per section that lands like a physical sensation
7. EMOTIONAL SHIFTS — each section should feel different from the last (angry→tender, numb→raw)

CRAFT & STRUCTURE STANDARDS:

General:
- Prioritize economy of language.
- Remove unnecessary adjectives.
- Prefer strong nouns and verbs over descriptive phrasing.
- Avoid lines that read like explanations.
- If a line contains multiple clauses, simplify it.
- All lyrics must read as natural spoken language.
- Avoid vague or invented idioms that do not clearly map to real speech.
- If a phrase sounds poetic but unclear, rewrite it in plainer language.
- Prioritize clarity over cleverness.

Verses:
- Verses may be detailed and scene-driven.
- Allow rhythmic complexity in verses.
- Avoid over-symmetry; slight irregularity is acceptable.

Hooks (Chorus / Refrain):
- Hooks must be cleaner and more compressed than verses.
- Average 4-8 words per line in hooks.
- Prefer 1-2 syllable words when possible.
- Limit metaphors in hooks (maximum 1 central metaphor).
- Emphasize repetition of a core phrase.
- Each hook line should be chantable after one listen.
- Avoid multi-clause sentences in hooks.
- Avoid overly abstract or technical vocabulary in hooks unless it is the main phrase.

Sound & Punch:
- Favor strong consonant sounds and rhythmic phrasing in hooks.
- Hooks should feel physically speakable in one breath.

Singability:
- Favor open vowel sounds (ah, oh, ee, ay) on key words and line endings.
- Avoid consonant clusters that trip the tongue when sung quickly.
- Each line should be speakable in one natural breath.
- End lines on sounds that can be held or resonate (open vowels, m, n, l) rather than hard stops (t, k, p) when possible.
- Read each line as if singing it — if it feels clunky in the mouth, simplify.

Rhyme:
- Every section must have a rhyme scheme (AABB, ABAB, or ABCB). Do NOT label lines with scheme letters in the output.
- Slant rhymes and near rhymes are fine (e.g. "glass" / "fast", "door" / "drawer").
- Never break grammar, clarity, or meaning to force a rhyme. Every line must make literal sense on its own.
- A clear, natural line that near-rhymes is always better than a nonsensical line with a perfect rhyme.
- BAD (forced rhyme, no real meaning): "There's a warm glass of red on the bathroom cut" — "bathroom cut" is not a real thing, it exists only to rhyme with "shut."
- BAD (forced rhyme, no real meaning): "Keeps scrolling rent on another man's land" — "scrolling rent" is not how anyone talks, forced to rhyme with "hand."
- If you catch yourself inventing a phrase that no one would say in conversation, the rhyme is not worth it. Rewrite the whole line.

Rhythm & Line Length:
- Line lengths within a section should follow a repeating pattern, not be random.
- Good: 6, 10, 6, 10, 6, 10 or 8, 8, 8, 8 or 10, 10, 6, 10, 10, 6.
- Bad: 10, 12, 7, 11, 9, 13 (no pattern, feels chaotic).
- Not every line needs a concrete detail. Let some lines breathe.
- Mix dense image lines with short, bare emotional statements.
- Occasionally let a sentence spill across two lines (enjambment).
- Vary sentence structure — if three lines start the same way, change the fourth.

STRUCTURAL FOCUS:
- Focus each verse on 1-2 key moments or images.
- Avoid stacking multiple time jumps or separate scenes in one verse.
- Select the strongest images; remove weaker supporting detail.
- Pre-choruses should increase tension, not restate the verse.

BAD (abstract LLM output):
"In the shadows of my mind I wander through the echoes
Searching for a light that fades like whispers in the wind
The tapestry of memories unravels at the seams
As I transcend the boundaries of what we could have been"

GOOD (specific, lived-detail writing):
"Your coffee mug's still on the counter, Wednesday morning light
I keep stepping over boxes I packed three weeks ago
The landlord needs an answer and my sister needs a ride
But I'm just sitting on the kitchen floor in yesterday's clothes"

The GOOD example works because: specific mug, specific day, specific floor, specific detail about boxes with a time frame, real obligations pulling at the narrator. Every line is a scene you can photograph.

Previous Chat History:
{chat_history}

Reference lyrics — study their rhythm, rhyme schemes, flow, tone, and the kinds of details they use. Draw inspiration from their emotional register and imagery approach, but write original lines. Do not copy phrases directly:
{context}

User Request: {question}"""

        prompt = PromptTemplate(
            input_variables=["context", "chat_history", "question"],
            template=system_template
        )

        # Initialize language model
        llm = ChatOpenAI(
            temperature=0.95,
            model_name=Settings.LLM_MODEL,
            top_p=0.9,
            presence_penalty=0.25,
            frequency_penalty=0.2,
            model_kwargs={"max_completion_tokens": 2000},
        )

        # Create QA chain
        self.qa_chain = ConversationalRetrievalChain.from_llm(
            llm=llm,
            retriever=retriever,
            return_source_documents=True,
            combine_docs_chain_kwargs={
                "prompt": prompt,
                "document_prompt": document_prompt,
                "document_variable_name": "context"
            }
        )

    @retry(
        retry=retry_if_exception_type((APIConnectionError, RateLimitError)),
        wait=wait_exponential(multiplier=2, min=4, max=60),
        stop=stop_after_attempt(10)
    )
    def _similarity_search_with_retry(self, query: str, k: int = 5):
        """Perform similarity search with retry logic"""
        try:
            # First verify OpenAI connection
            test_embedding = self.embeddings.embed_query("test")
            if not test_embedding:
                raise RuntimeError("Empty response from OpenAI")
            
            # Then do the actual search
            return self.vector_store.similarity_search_with_score(
                query,
                k=k
            )
        except APIConnectionError as e:
            print(f"OpenAI API Connection Error: {str(e)}")
            print("Retrying...")
            raise  # Retry
        except Exception as e:
            print(f"Similarity search error: {type(e).__name__}: {str(e)}")
            raise

    def generate_lyrics(
        self,
        prompt: str,
        chat_history: Optional[List] = None
    ) -> Dict:
        """Generate lyrics based on prompt and chat history"""
        if not self.qa_chain:
            raise ValueError(
                "QA chain not initialized. "
                "Please ensure embeddings are loaded correctly."
            )

        if not prompt.strip():
            raise ValueError("Prompt cannot be empty")

        if chat_history is None:
            chat_history = []

        try:
            print("Starting lyrics generation process...")
            print(f"Using OpenAI model: {Settings.LLM_MODEL}")

            try:
                print("Attempting OpenAI API call...")
                # Generate response using invoke — DiverseRetriever handles retrieval
                response = self.qa_chain.invoke({
                    "question": prompt,
                    "chat_history": chat_history
                })
                print("Successfully generated response from OpenAI")

            except Exception as e:
                error_msg = str(e)
                print(f"OpenAI API error details: {error_msg}")
                if "401" in error_msg:
                    raise RuntimeError(
                        "OpenAI API authentication failed. Please verify the API key."
                    )
                elif "429" in error_msg:
                    raise RuntimeError(
                        "OpenAI API rate limit exceeded. Please try again in a moment."
                    )
                elif "connect" in error_msg.lower():
                    raise RuntimeError(
                        "Connection to OpenAI failed. This might be a temporary issue. "
                        "Please try again."
                    )
                else:
                    raise RuntimeError(f"OpenAI API error: {error_msg}")

            # Build context details from the chain's actual source documents
            source_docs = response.get("source_documents", [])
            context_details = []
            for doc in source_docs[:10]:
                context_details.append({
                    'artist': doc.metadata.get('artist', 'Unknown'),
                    'song': doc.metadata.get('song_title', 'Unknown'),
                    'content': doc.page_content[:200] + "..."
                })

            unique_artists = len({d['artist'] for d in context_details})
            print(f"Sources shown: {len(context_details)} chunks from {unique_artists} artists")

            response["context_details"] = context_details

            return response

        except Exception as e:
            print(f"Error in generate_lyrics: {str(e)}")
            raise RuntimeError(f"Failed to generate lyrics: {str(e)}")

    def _examine_sqlite_db(self, db_path: Path) -> None:
        """Examine the contents of the SQLite database"""
        try:
            print(f"\nExamining SQLite database at: {db_path}")
            conn = sqlite3.connect(db_path)
            cursor = conn.cursor()
            
            # List all tables
            cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
            tables = cursor.fetchall()
            print("\nTables in database:")
            for table in tables:
                print(f"- {table[0]}")
            
            # Get collection info - updated query for newer ChromaDB schema
            print("\nCollections:")
            cursor.execute("SELECT name, id FROM collections;")
            collections = cursor.fetchall()
            for name, collection_id in collections:
                print(f"- Name: {name}")
                print(f"  ID: {collection_id}")
                # Get count of embeddings
                cursor.execute("SELECT COUNT(*) FROM embeddings WHERE collection_id = ?", (collection_id,))
                count = cursor.fetchone()[0]
                print(f"  Embeddings count: {count}")
            
            conn.close()
            
        except Exception as e:
            print(f"Warning: Could not fully examine SQLite database: {e}")

    def _verify_openai_connection(self):
        """Verify OpenAI API connection"""
        try:
            print("Verifying OpenAI API connection...")
            test_embedding = self.embeddings.embed_query("test")
            if test_embedding:
                print("OpenAI API connection verified")
                return True
        except Exception as e:
            print(f"OpenAI API connection test failed: {type(e).__name__}: {str(e)}")
            return False