Spaces:

SongLift
/

LyrGen2

Sleeping

App Files Files Community

James-Edmunds commited on Feb 18

Commit

4c377bd

verified ·

1 Parent(s): d305e69

Upload folder using huggingface_hub

Browse files

Files changed (3) hide show

app.py +12 -2
config/settings.py +1 -1
src/generator/generator.py +91 -79

app.py CHANGED Viewed

@@ -10,10 +10,19 @@ if os.getenv('DEPLOYMENT_MODE') == 'huggingface':
 else:
     os.environ['DYLD_LIBRARY_PATH'] = '/usr/local/opt/sqlite/lib'
 from src.generator.generator import LyricGenerator
 from config.settings import Settings
 def main():
     """Main application function"""
     st.set_page_config(
@@ -23,6 +32,7 @@ def main():
     )
     st.title("SongLift LyrGen2")
     # Only run startup once per session
     if 'initialized' not in st.session_state:
@@ -57,7 +67,7 @@ def main():
         with st.chat_message("user"):
             st.write(user_msg)
         with st.chat_message("assistant"):
-            st.markdown(f"```\n{assistant_msg}\n```")
     # Chat interface
     user_input = st.chat_input("Enter your prompt (ask for new lyrics or modify existing ones)...")
@@ -76,7 +86,7 @@ def main():
                 # Store the response
                 lyrics = response['answer']
-                st.markdown(f"```\n{lyrics}\n```")
                 st.session_state.current_lyrics = lyrics
                 # Display sources with content

 else:
     os.environ['DYLD_LIBRARY_PATH'] = '/usr/local/opt/sqlite/lib'
+import re
 from src.generator.generator import LyricGenerator
 from config.settings import Settings
+def format_lyrics(text: str) -> str:
+    """Format lyrics: bold section markers, clean whitespace."""
+    # Bold section markers like [Verse 1], [Chorus], etc.
+    text = re.sub(r'\[([^\]]+)\]', r'**[\1]**', text)
+    return text.strip()
 def main():
     """Main application function"""
     st.set_page_config(
     )
     st.title("SongLift LyrGen2")
+    st.sidebar.markdown(f"**Model:** {Settings.LLM_MODEL}")
     # Only run startup once per session
     if 'initialized' not in st.session_state:
         with st.chat_message("user"):
             st.write(user_msg)
         with st.chat_message("assistant"):
+            st.markdown(format_lyrics(assistant_msg))
     # Chat interface
     user_input = st.chat_input("Enter your prompt (ask for new lyrics or modify existing ones)...")
                 # Store the response
                 lyrics = response['answer']
+                st.markdown(format_lyrics(lyrics))
                 st.session_state.current_lyrics = lyrics
                 # Display sources with content

config/settings.py CHANGED Viewed

@@ -26,7 +26,7 @@ class Settings:
     # Model Settings
     EMBEDDING_MODEL = "text-embedding-ada-002"
-    LLM_MODEL = "gpt-4"
     # ChromaDB Settings
     CHROMA_COLLECTION_NAME = "lyrics_v1"

     # Model Settings
     EMBEDDING_MODEL = "text-embedding-ada-002"
+    LLM_MODEL = os.getenv("LLM_MODEL", "gpt-5.2")
     # ChromaDB Settings
     CHROMA_COLLECTION_NAME = "lyrics_v1"

src/generator/generator.py CHANGED Viewed

@@ -1,10 +1,14 @@
 from typing import Dict, List, Optional
 from pathlib import Path
 import shutil
 from langchain_openai import OpenAIEmbeddings, ChatOpenAI
 from langchain_chroma import Chroma
 from langchain.chains import ConversationalRetrievalChain
 from langchain.prompts import PromptTemplate
 from huggingface_hub import snapshot_download, hf_hub_download, HfApi
 from config.settings import Settings
 from tenacity import retry, stop_after_attempt, wait_exponential, retry_if_exception_type
@@ -13,6 +17,37 @@ import sqlite3
 from openai import APIConnectionError, RateLimitError
 class LyricGenerator:
     def __init__(self):
         """Initialize the generator with embeddings"""
@@ -189,9 +224,12 @@ class LyricGenerator:
     def _setup_qa_chain(self) -> None:
         """Initialize the QA chain for generating lyrics"""
-        # Configure retriever with a more reasonable number of documents
-        retriever = self.vector_store.as_retriever(
-            search_kwargs={"k": 20}  # Reduced from 100 to 20
         )
         # Create document prompt
@@ -201,76 +239,50 @@ class LyricGenerator:
         )
         # System prompt template
-        system_template = """You are an expert lyricist who excels at analyzing and
-        adapting patterns from existing lyrics while maintaining professional
-        songwriting standards.
-        CONTEXT UTILIZATION:
-        1. Analysis of Retrieved Examples:
-           - Study rhyme patterns, flow structures, and wordplay
-           - Identify unique stylistic elements matching the requested genre
-           - Extract metaphors and imagery that fit the context
-           - Note how similar themes are handled in different styles
-        2. Technical Song Construction:
-           - Mathematical precision in section lengths (8, 16, 32 bars)
-           - Strategic placement of hooks and payoff lines
-           - Clear sectional contrasts (verse/pre-chorus/chorus dynamics)
-           - Melodic phrasing considerations
-           - Build tension and release patterns
-        3. Genre-Specific Excellence:
-           Pop Structure:
-           - Compact, focused verses (8-16 bars)
-           - Pre-chorus build-up (4-8 bars)
-           - Strong chorus payoff (8 bars)
-           - Bridge contrasts (8 bars)
-           - Strategic repetition
-           - Emphasis on memorable, repeatable phrases
-           Hip-Hop Elements:
-           - Complex rhyme schemes (multisyllabic, internal)
-           - Advanced wordplay and metaphors
-           - Flow patterns matching contemporary cadences
-           - Authentic voice and modern language
-           - Hooks that balance complexity with accessibility
-        GENERATION APPROACH:
-        1. First, analyze retrieved lyrics for:
-           - Successful patterns and techniques
-           - Thematic handling
-           - Style-specific elements
-           - Unique devices worth adapting
-        2. Then craft content that:
-           - Adapts identified patterns thoughtfully
-           - Maintains professional structure
-           - Blends inspiration with innovation
-           - Stays current and authentic
-           - Avoids dated or overdone tropes
-        3. Always ensure:
-           - Technical excellence in rhythm and flow
-           - Proper section marking [Verse/Chorus/etc]
-           - Emotional resonance and authenticity
-           - Modern, fresh perspective
-           - Strategic use of space and silence
-        Previous Chat History:
-        {chat_history}
-        Retrieved Examples for Analysis:
-        {context}
-        Response Format:
-        1. Brief analysis of relevant patterns from retrieved examples
-        2. Generated lyrics with clear section marking
-        3. Explanation of how you adapted specific elements from the sources
-        User Request: {question}
-        Response: Let me analyze the retrieved lyrics and craft something that
-        combines their strongest elements with professional songwriting principles."""
         prompt = PromptTemplate(
             input_variables=["context", "chat_history", "question"],
@@ -279,12 +291,12 @@ class LyricGenerator:
         # Initialize language model
         llm = ChatOpenAI(
-            temperature=0.9,
-            model_name="gpt-4",
-            max_tokens=1000,
-            top_p=0.95,
-            presence_penalty=0.0,
-            frequency_penalty=0.1
         )
         # Create QA chain

 from typing import Dict, List, Optional
 from pathlib import Path
+from collections import defaultdict
 import shutil
 from langchain_openai import OpenAIEmbeddings, ChatOpenAI
 from langchain_chroma import Chroma
 from langchain.chains import ConversationalRetrievalChain
 from langchain.prompts import PromptTemplate
+from langchain_core.callbacks import CallbackManagerForRetrieverRun
+from langchain_core.documents import Document
+from langchain_core.retrievers import BaseRetriever
 from huggingface_hub import snapshot_download, hf_hub_download, HfApi
 from config.settings import Settings
 from tenacity import retry, stop_after_attempt, wait_exponential, retry_if_exception_type
 from openai import APIConnectionError, RateLimitError
+class DiverseRetriever(BaseRetriever):
+    """Retriever that caps per-artist chunks to ensure diverse sources."""
+    vector_store: Chroma
+    fetch_k: int = 60
+    max_per_artist: int = 3
+    final_k: int = 20
+    class Config:
+        arbitrary_types_allowed = True
+    def _get_relevant_documents(
+        self, query: str, *, run_manager: CallbackManagerForRetrieverRun
+    ) -> List[Document]:
+        results = self.vector_store.similarity_search_with_score(
+            query, k=self.fetch_k
+        )
+        artist_counts: dict = defaultdict(int)
+        selected: List[Document] = []
+        for doc, _score in results:
+            artist = doc.metadata.get("artist", "unknown")
+            if artist_counts[artist] < self.max_per_artist:
+                artist_counts[artist] += 1
+                selected.append(doc)
+            if len(selected) >= self.final_k:
+                break
+        unique_artists = len(artist_counts)
+        print(f"DiverseRetriever: {len(selected)} chunks from {unique_artists} artists")
+        return selected
 class LyricGenerator:
     def __init__(self):
         """Initialize the generator with embeddings"""
     def _setup_qa_chain(self) -> None:
         """Initialize the QA chain for generating lyrics"""
+        # Configure diverse retriever: fetch 60, cap 3 per artist, return top 20
+        retriever = DiverseRetriever(
+            vector_store=self.vector_store,
+            fetch_k=60,
+            max_per_artist=3,
+            final_k=20,
         )
         # Create document prompt
         )
         # System prompt template
+        system_template = """You are a professional songwriter. Your ONLY output is lyrics with section markers. No analysis. No explanation. No commentary. No source references. Nothing before the lyrics. Nothing after the lyrics.
+OUTPUT FORMAT:
+[Section Name]
+lyrics here
+[Next Section]
+lyrics here
+That is it. Section markers in brackets, lyrics below each one. Nothing else.
+BANNED WORDS — never use any of these:
+neon, algorithm, digital, phantom, pixel, shadow, reflection, concrete jungle, echo chamber, midnight, cypher, whisper, canvas, tapestry, labyrinth, mosaic, symphony, aurora, ethereal, cosmic, celestial, visceral, transcend, paradigm, ultrapixel, emotional phantom
+SPECIFICITY RULES — every line must follow these:
+1. SCENES over concepts — put the listener in a specific place with objects they can see
+2. OBJECTS over adjectives — name the actual thing (a dented Ford Ranger, not "a broken vehicle")
+3. CONSEQUENCES over metaphors — show what happened, not what it was like
+4. TEMPORAL SPECIFICS — Tuesday, 3 AM, late August, second semester, not "one day" or "sometimes"
+5. DOMESTIC DETAIL — kitchen tables, screen doors, parking lots, unwashed mugs, not abstract spaces
+6. GUT-PUNCH MOMENTS — one line per section that lands like a physical sensation
+7. EMOTIONAL SHIFTS — each section should feel different from the last (angry→tender, numb→raw)
+BAD (abstract LLM output):
+"In the shadows of my mind I wander through the echoes
+Searching for a light that fades like whispers in the wind
+The tapestry of memories unravels at the seams
+As I transcend the boundaries of what we could have been"
+GOOD (specific, lived-detail writing):
+"Your coffee mug's still on the counter, Wednesday morning light
+I keep stepping over boxes I packed three weeks ago
+The landlord needs an answer and my sister needs a ride
+But I'm just sitting on the kitchen floor in yesterday's clothes"
+The GOOD example works because: specific mug, specific day, specific floor, specific detail about boxes with a time frame, real obligations pulling at the narrator. Every line is a scene you can photograph.
+Previous Chat History:
+{chat_history}
+Reference lyrics — study their rhythm, rhyme schemes, flow, tone, and the kinds of details they use. Draw inspiration from their emotional register and imagery approach, but write original lines. Do not copy phrases directly:
+{context}
+User Request: {question}"""
         prompt = PromptTemplate(
             input_variables=["context", "chat_history", "question"],
         # Initialize language model
         llm = ChatOpenAI(
+            temperature=0.95,
+            model_name=Settings.LLM_MODEL,
+            max_tokens=2000,
+            top_p=0.9,
+            presence_penalty=0.25,
+            frequency_penalty=0.2
         )
         # Create QA chain