James-Edmunds commited on
Commit
4c377bd
·
verified ·
1 Parent(s): d305e69

Upload folder using huggingface_hub

Browse files
Files changed (3) hide show
  1. app.py +12 -2
  2. config/settings.py +1 -1
  3. src/generator/generator.py +91 -79
app.py CHANGED
@@ -10,10 +10,19 @@ if os.getenv('DEPLOYMENT_MODE') == 'huggingface':
10
  else:
11
  os.environ['DYLD_LIBRARY_PATH'] = '/usr/local/opt/sqlite/lib'
12
 
 
 
13
  from src.generator.generator import LyricGenerator
14
  from config.settings import Settings
15
 
16
 
 
 
 
 
 
 
 
17
  def main():
18
  """Main application function"""
19
  st.set_page_config(
@@ -23,6 +32,7 @@ def main():
23
  )
24
 
25
  st.title("SongLift LyrGen2")
 
26
 
27
  # Only run startup once per session
28
  if 'initialized' not in st.session_state:
@@ -57,7 +67,7 @@ def main():
57
  with st.chat_message("user"):
58
  st.write(user_msg)
59
  with st.chat_message("assistant"):
60
- st.markdown(f"```\n{assistant_msg}\n```")
61
 
62
  # Chat interface
63
  user_input = st.chat_input("Enter your prompt (ask for new lyrics or modify existing ones)...")
@@ -76,7 +86,7 @@ def main():
76
 
77
  # Store the response
78
  lyrics = response['answer']
79
- st.markdown(f"```\n{lyrics}\n```")
80
  st.session_state.current_lyrics = lyrics
81
 
82
  # Display sources with content
 
10
  else:
11
  os.environ['DYLD_LIBRARY_PATH'] = '/usr/local/opt/sqlite/lib'
12
 
13
+ import re
14
+
15
  from src.generator.generator import LyricGenerator
16
  from config.settings import Settings
17
 
18
 
19
+ def format_lyrics(text: str) -> str:
20
+ """Format lyrics: bold section markers, clean whitespace."""
21
+ # Bold section markers like [Verse 1], [Chorus], etc.
22
+ text = re.sub(r'\[([^\]]+)\]', r'**[\1]**', text)
23
+ return text.strip()
24
+
25
+
26
  def main():
27
  """Main application function"""
28
  st.set_page_config(
 
32
  )
33
 
34
  st.title("SongLift LyrGen2")
35
+ st.sidebar.markdown(f"**Model:** {Settings.LLM_MODEL}")
36
 
37
  # Only run startup once per session
38
  if 'initialized' not in st.session_state:
 
67
  with st.chat_message("user"):
68
  st.write(user_msg)
69
  with st.chat_message("assistant"):
70
+ st.markdown(format_lyrics(assistant_msg))
71
 
72
  # Chat interface
73
  user_input = st.chat_input("Enter your prompt (ask for new lyrics or modify existing ones)...")
 
86
 
87
  # Store the response
88
  lyrics = response['answer']
89
+ st.markdown(format_lyrics(lyrics))
90
  st.session_state.current_lyrics = lyrics
91
 
92
  # Display sources with content
config/settings.py CHANGED
@@ -26,7 +26,7 @@ class Settings:
26
 
27
  # Model Settings
28
  EMBEDDING_MODEL = "text-embedding-ada-002"
29
- LLM_MODEL = "gpt-4"
30
 
31
  # ChromaDB Settings
32
  CHROMA_COLLECTION_NAME = "lyrics_v1"
 
26
 
27
  # Model Settings
28
  EMBEDDING_MODEL = "text-embedding-ada-002"
29
+ LLM_MODEL = os.getenv("LLM_MODEL", "gpt-5.2")
30
 
31
  # ChromaDB Settings
32
  CHROMA_COLLECTION_NAME = "lyrics_v1"
src/generator/generator.py CHANGED
@@ -1,10 +1,14 @@
1
  from typing import Dict, List, Optional
2
  from pathlib import Path
 
3
  import shutil
4
  from langchain_openai import OpenAIEmbeddings, ChatOpenAI
5
  from langchain_chroma import Chroma
6
  from langchain.chains import ConversationalRetrievalChain
7
  from langchain.prompts import PromptTemplate
 
 
 
8
  from huggingface_hub import snapshot_download, hf_hub_download, HfApi
9
  from config.settings import Settings
10
  from tenacity import retry, stop_after_attempt, wait_exponential, retry_if_exception_type
@@ -13,6 +17,37 @@ import sqlite3
13
  from openai import APIConnectionError, RateLimitError
14
 
15
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  class LyricGenerator:
17
  def __init__(self):
18
  """Initialize the generator with embeddings"""
@@ -189,9 +224,12 @@ class LyricGenerator:
189
 
190
  def _setup_qa_chain(self) -> None:
191
  """Initialize the QA chain for generating lyrics"""
192
- # Configure retriever with a more reasonable number of documents
193
- retriever = self.vector_store.as_retriever(
194
- search_kwargs={"k": 20} # Reduced from 100 to 20
 
 
 
195
  )
196
 
197
  # Create document prompt
@@ -201,76 +239,50 @@ class LyricGenerator:
201
  )
202
 
203
  # System prompt template
204
- system_template = """You are an expert lyricist who excels at analyzing and
205
- adapting patterns from existing lyrics while maintaining professional
206
- songwriting standards.
207
-
208
- CONTEXT UTILIZATION:
209
- 1. Analysis of Retrieved Examples:
210
- - Study rhyme patterns, flow structures, and wordplay
211
- - Identify unique stylistic elements matching the requested genre
212
- - Extract metaphors and imagery that fit the context
213
- - Note how similar themes are handled in different styles
214
-
215
- 2. Technical Song Construction:
216
- - Mathematical precision in section lengths (8, 16, 32 bars)
217
- - Strategic placement of hooks and payoff lines
218
- - Clear sectional contrasts (verse/pre-chorus/chorus dynamics)
219
- - Melodic phrasing considerations
220
- - Build tension and release patterns
221
-
222
- 3. Genre-Specific Excellence:
223
- Pop Structure:
224
- - Compact, focused verses (8-16 bars)
225
- - Pre-chorus build-up (4-8 bars)
226
- - Strong chorus payoff (8 bars)
227
- - Bridge contrasts (8 bars)
228
- - Strategic repetition
229
- - Emphasis on memorable, repeatable phrases
230
-
231
- Hip-Hop Elements:
232
- - Complex rhyme schemes (multisyllabic, internal)
233
- - Advanced wordplay and metaphors
234
- - Flow patterns matching contemporary cadences
235
- - Authentic voice and modern language
236
- - Hooks that balance complexity with accessibility
237
-
238
- GENERATION APPROACH:
239
- 1. First, analyze retrieved lyrics for:
240
- - Successful patterns and techniques
241
- - Thematic handling
242
- - Style-specific elements
243
- - Unique devices worth adapting
244
-
245
- 2. Then craft content that:
246
- - Adapts identified patterns thoughtfully
247
- - Maintains professional structure
248
- - Blends inspiration with innovation
249
- - Stays current and authentic
250
- - Avoids dated or overdone tropes
251
-
252
- 3. Always ensure:
253
- - Technical excellence in rhythm and flow
254
- - Proper section marking [Verse/Chorus/etc]
255
- - Emotional resonance and authenticity
256
- - Modern, fresh perspective
257
- - Strategic use of space and silence
258
-
259
- Previous Chat History:
260
- {chat_history}
261
-
262
- Retrieved Examples for Analysis:
263
- {context}
264
-
265
- Response Format:
266
- 1. Brief analysis of relevant patterns from retrieved examples
267
- 2. Generated lyrics with clear section marking
268
- 3. Explanation of how you adapted specific elements from the sources
269
-
270
- User Request: {question}
271
-
272
- Response: Let me analyze the retrieved lyrics and craft something that
273
- combines their strongest elements with professional songwriting principles."""
274
 
275
  prompt = PromptTemplate(
276
  input_variables=["context", "chat_history", "question"],
@@ -279,12 +291,12 @@ class LyricGenerator:
279
 
280
  # Initialize language model
281
  llm = ChatOpenAI(
282
- temperature=0.9,
283
- model_name="gpt-4",
284
- max_tokens=1000,
285
- top_p=0.95,
286
- presence_penalty=0.0,
287
- frequency_penalty=0.1
288
  )
289
 
290
  # Create QA chain
 
1
  from typing import Dict, List, Optional
2
  from pathlib import Path
3
+ from collections import defaultdict
4
  import shutil
5
  from langchain_openai import OpenAIEmbeddings, ChatOpenAI
6
  from langchain_chroma import Chroma
7
  from langchain.chains import ConversationalRetrievalChain
8
  from langchain.prompts import PromptTemplate
9
+ from langchain_core.callbacks import CallbackManagerForRetrieverRun
10
+ from langchain_core.documents import Document
11
+ from langchain_core.retrievers import BaseRetriever
12
  from huggingface_hub import snapshot_download, hf_hub_download, HfApi
13
  from config.settings import Settings
14
  from tenacity import retry, stop_after_attempt, wait_exponential, retry_if_exception_type
 
17
  from openai import APIConnectionError, RateLimitError
18
 
19
 
20
+ class DiverseRetriever(BaseRetriever):
21
+ """Retriever that caps per-artist chunks to ensure diverse sources."""
22
+
23
+ vector_store: Chroma
24
+ fetch_k: int = 60
25
+ max_per_artist: int = 3
26
+ final_k: int = 20
27
+
28
+ class Config:
29
+ arbitrary_types_allowed = True
30
+
31
+ def _get_relevant_documents(
32
+ self, query: str, *, run_manager: CallbackManagerForRetrieverRun
33
+ ) -> List[Document]:
34
+ results = self.vector_store.similarity_search_with_score(
35
+ query, k=self.fetch_k
36
+ )
37
+ artist_counts: dict = defaultdict(int)
38
+ selected: List[Document] = []
39
+ for doc, _score in results:
40
+ artist = doc.metadata.get("artist", "unknown")
41
+ if artist_counts[artist] < self.max_per_artist:
42
+ artist_counts[artist] += 1
43
+ selected.append(doc)
44
+ if len(selected) >= self.final_k:
45
+ break
46
+ unique_artists = len(artist_counts)
47
+ print(f"DiverseRetriever: {len(selected)} chunks from {unique_artists} artists")
48
+ return selected
49
+
50
+
51
  class LyricGenerator:
52
  def __init__(self):
53
  """Initialize the generator with embeddings"""
 
224
 
225
  def _setup_qa_chain(self) -> None:
226
  """Initialize the QA chain for generating lyrics"""
227
+ # Configure diverse retriever: fetch 60, cap 3 per artist, return top 20
228
+ retriever = DiverseRetriever(
229
+ vector_store=self.vector_store,
230
+ fetch_k=60,
231
+ max_per_artist=3,
232
+ final_k=20,
233
  )
234
 
235
  # Create document prompt
 
239
  )
240
 
241
  # System prompt template
242
+ system_template = """You are a professional songwriter. Your ONLY output is lyrics with section markers. No analysis. No explanation. No commentary. No source references. Nothing before the lyrics. Nothing after the lyrics.
243
+
244
+ OUTPUT FORMAT:
245
+ [Section Name]
246
+ lyrics here
247
+
248
+ [Next Section]
249
+ lyrics here
250
+
251
+ That is it. Section markers in brackets, lyrics below each one. Nothing else.
252
+
253
+ BANNED WORDS never use any of these:
254
+ neon, algorithm, digital, phantom, pixel, shadow, reflection, concrete jungle, echo chamber, midnight, cypher, whisper, canvas, tapestry, labyrinth, mosaic, symphony, aurora, ethereal, cosmic, celestial, visceral, transcend, paradigm, ultrapixel, emotional phantom
255
+
256
+ SPECIFICITY RULES every line must follow these:
257
+ 1. SCENES over concepts — put the listener in a specific place with objects they can see
258
+ 2. OBJECTS over adjectives name the actual thing (a dented Ford Ranger, not "a broken vehicle")
259
+ 3. CONSEQUENCES over metaphors — show what happened, not what it was like
260
+ 4. TEMPORAL SPECIFICS — Tuesday, 3 AM, late August, second semester, not "one day" or "sometimes"
261
+ 5. DOMESTIC DETAIL — kitchen tables, screen doors, parking lots, unwashed mugs, not abstract spaces
262
+ 6. GUT-PUNCH MOMENTS one line per section that lands like a physical sensation
263
+ 7. EMOTIONAL SHIFTS — each section should feel different from the last (angry→tender, numb→raw)
264
+
265
+ BAD (abstract LLM output):
266
+ "In the shadows of my mind I wander through the echoes
267
+ Searching for a light that fades like whispers in the wind
268
+ The tapestry of memories unravels at the seams
269
+ As I transcend the boundaries of what we could have been"
270
+
271
+ GOOD (specific, lived-detail writing):
272
+ "Your coffee mug's still on the counter, Wednesday morning light
273
+ I keep stepping over boxes I packed three weeks ago
274
+ The landlord needs an answer and my sister needs a ride
275
+ But I'm just sitting on the kitchen floor in yesterday's clothes"
276
+
277
+ The GOOD example works because: specific mug, specific day, specific floor, specific detail about boxes with a time frame, real obligations pulling at the narrator. Every line is a scene you can photograph.
278
+
279
+ Previous Chat History:
280
+ {chat_history}
281
+
282
+ Reference lyrics — study their rhythm, rhyme schemes, flow, tone, and the kinds of details they use. Draw inspiration from their emotional register and imagery approach, but write original lines. Do not copy phrases directly:
283
+ {context}
284
+
285
+ User Request: {question}"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
286
 
287
  prompt = PromptTemplate(
288
  input_variables=["context", "chat_history", "question"],
 
291
 
292
  # Initialize language model
293
  llm = ChatOpenAI(
294
+ temperature=0.95,
295
+ model_name=Settings.LLM_MODEL,
296
+ max_tokens=2000,
297
+ top_p=0.9,
298
+ presence_penalty=0.25,
299
+ frequency_penalty=0.2
300
  )
301
 
302
  # Create QA chain