James Edmunds commited on
Commit ·
9567ff8
1
Parent(s): df5d609
Chatbot working and well! Updated system prompt, sources and explanations in chat
Browse files- app.py +42 -5
- src/generator/generator.py +87 -50
app.py
CHANGED
|
@@ -40,9 +40,18 @@ def main():
|
|
| 40 |
st.stop()
|
| 41 |
st.session_state.generator = generator
|
| 42 |
st.session_state.chat_history = []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 43 |
|
| 44 |
# Chat interface
|
| 45 |
-
user_input = st.chat_input("Enter your prompt...")
|
| 46 |
|
| 47 |
if user_input:
|
| 48 |
with st.chat_message("user"):
|
|
@@ -55,11 +64,38 @@ def main():
|
|
| 55 |
user_input,
|
| 56 |
st.session_state.chat_history
|
| 57 |
)
|
| 58 |
-
st.markdown(f"```\n{response['answer']}\n```")
|
| 59 |
|
| 60 |
-
#
|
| 61 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 62 |
for doc, score in response["source_documents_with_scores"]:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 63 |
similarity = round((1 - score) * 100, 2)
|
| 64 |
st.write(
|
| 65 |
f"- {doc.metadata['artist']} - "
|
|
@@ -67,8 +103,9 @@ def main():
|
|
| 67 |
f"(Similarity: {similarity}%)"
|
| 68 |
)
|
| 69 |
|
|
|
|
| 70 |
st.session_state.chat_history.append(
|
| 71 |
-
(user_input,
|
| 72 |
)
|
| 73 |
|
| 74 |
except Exception as e:
|
|
|
|
| 40 |
st.stop()
|
| 41 |
st.session_state.generator = generator
|
| 42 |
st.session_state.chat_history = []
|
| 43 |
+
st.session_state.current_lyrics = None
|
| 44 |
+
|
| 45 |
+
# Display chat history
|
| 46 |
+
for message in st.session_state.chat_history:
|
| 47 |
+
user_msg, assistant_msg = message
|
| 48 |
+
with st.chat_message("user"):
|
| 49 |
+
st.write(user_msg)
|
| 50 |
+
with st.chat_message("assistant"):
|
| 51 |
+
st.markdown(f"```\n{assistant_msg}\n```")
|
| 52 |
|
| 53 |
# Chat interface
|
| 54 |
+
user_input = st.chat_input("Enter your prompt (ask for new lyrics or modify existing ones)...")
|
| 55 |
|
| 56 |
if user_input:
|
| 57 |
with st.chat_message("user"):
|
|
|
|
| 64 |
user_input,
|
| 65 |
st.session_state.chat_history
|
| 66 |
)
|
|
|
|
| 67 |
|
| 68 |
+
# Store the response
|
| 69 |
+
lyrics = response['answer']
|
| 70 |
+
st.markdown(f"```\n{lyrics}\n```")
|
| 71 |
+
st.session_state.current_lyrics = lyrics
|
| 72 |
+
|
| 73 |
+
# Display sources with content
|
| 74 |
+
with st.expander("View Sources and Context"):
|
| 75 |
+
st.write("### Top Retrieved Contexts")
|
| 76 |
+
for detail in response["context_details"]:
|
| 77 |
+
st.write(f"\n**{detail['artist']} - {detail['song']}** "
|
| 78 |
+
f"(Similarity: {detail['similarity']}%)")
|
| 79 |
+
st.write("Content snippet:")
|
| 80 |
+
st.text(detail['content'])
|
| 81 |
+
st.write("---")
|
| 82 |
+
|
| 83 |
+
st.write("\n### All Similar Sources")
|
| 84 |
+
# Deduplicate sources by artist and song title
|
| 85 |
+
seen_sources = set()
|
| 86 |
+
unique_sources = []
|
| 87 |
+
|
| 88 |
for doc, score in response["source_documents_with_scores"]:
|
| 89 |
+
source_key = (
|
| 90 |
+
doc.metadata['artist'],
|
| 91 |
+
doc.metadata['song_title']
|
| 92 |
+
)
|
| 93 |
+
if source_key not in seen_sources:
|
| 94 |
+
seen_sources.add(source_key)
|
| 95 |
+
unique_sources.append((doc, score))
|
| 96 |
+
|
| 97 |
+
# Display unique sources
|
| 98 |
+
for doc, score in unique_sources:
|
| 99 |
similarity = round((1 - score) * 100, 2)
|
| 100 |
st.write(
|
| 101 |
f"- {doc.metadata['artist']} - "
|
|
|
|
| 103 |
f"(Similarity: {similarity}%)"
|
| 104 |
)
|
| 105 |
|
| 106 |
+
# Update chat history
|
| 107 |
st.session_state.chat_history.append(
|
| 108 |
+
(user_input, lyrics)
|
| 109 |
)
|
| 110 |
|
| 111 |
except Exception as e:
|
src/generator/generator.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
from typing import Dict, List, Optional
|
| 2 |
from pathlib import Path
|
| 3 |
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
|
| 4 |
-
from
|
| 5 |
from langchain.chains import ConversationalRetrievalChain
|
| 6 |
from langchain.prompts import PromptTemplate
|
| 7 |
import json
|
|
@@ -48,9 +48,9 @@ class LyricGenerator:
|
|
| 48 |
|
| 49 |
def _setup_qa_chain(self) -> None:
|
| 50 |
"""Initialize the QA chain for generating lyrics"""
|
| 51 |
-
# Configure retriever
|
| 52 |
retriever = self.vector_store.as_retriever(
|
| 53 |
-
search_kwargs={"k":
|
| 54 |
)
|
| 55 |
|
| 56 |
# Create document prompt
|
|
@@ -60,51 +60,76 @@ class LyricGenerator:
|
|
| 60 |
)
|
| 61 |
|
| 62 |
# System prompt template
|
| 63 |
-
system_template = """You are an expert
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
You are most importantly cool, and young, and up-to-date with current cultural and musical trends.
|
| 68 |
-
|
| 69 |
-
You will never use any cheesy, old-school, or overdone lyrical tropes.
|
| 70 |
|
| 71 |
-
|
| 72 |
-
1.
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 105 |
User Request: {question}
|
| 106 |
|
| 107 |
-
Response: Let me
|
|
|
|
| 108 |
|
| 109 |
prompt = PromptTemplate(
|
| 110 |
input_variables=["context", "chat_history", "question"],
|
|
@@ -152,23 +177,35 @@ class LyricGenerator:
|
|
| 152 |
chat_history = []
|
| 153 |
|
| 154 |
try:
|
| 155 |
-
# Get documents with scores
|
| 156 |
docs_and_scores = self.vector_store.similarity_search_with_score(
|
| 157 |
prompt,
|
| 158 |
-
k=
|
| 159 |
)
|
| 160 |
|
| 161 |
# Sort by similarity (convert distance to similarity)
|
| 162 |
docs_and_scores.sort(key=lambda x: x[1], reverse=False)
|
| 163 |
|
| 164 |
-
#
|
| 165 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 166 |
"question": prompt,
|
| 167 |
"chat_history": chat_history
|
| 168 |
})
|
| 169 |
|
| 170 |
-
# Add
|
| 171 |
response["source_documents_with_scores"] = docs_and_scores
|
|
|
|
| 172 |
|
| 173 |
return response
|
| 174 |
|
|
|
|
| 1 |
from typing import Dict, List, Optional
|
| 2 |
from pathlib import Path
|
| 3 |
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
|
| 4 |
+
from langchain_chroma import Chroma
|
| 5 |
from langchain.chains import ConversationalRetrievalChain
|
| 6 |
from langchain.prompts import PromptTemplate
|
| 7 |
import json
|
|
|
|
| 48 |
|
| 49 |
def _setup_qa_chain(self) -> None:
|
| 50 |
"""Initialize the QA chain for generating lyrics"""
|
| 51 |
+
# Configure retriever with a more reasonable number of documents
|
| 52 |
retriever = self.vector_store.as_retriever(
|
| 53 |
+
search_kwargs={"k": 20} # Reduced from 100 to 20
|
| 54 |
)
|
| 55 |
|
| 56 |
# Create document prompt
|
|
|
|
| 60 |
)
|
| 61 |
|
| 62 |
# System prompt template
|
| 63 |
+
system_template = """You are an expert lyricist who excels at analyzing and
|
| 64 |
+
adapting patterns from existing lyrics while maintaining professional
|
| 65 |
+
songwriting standards.
|
|
|
|
|
|
|
|
|
|
|
|
|
| 66 |
|
| 67 |
+
CONTEXT UTILIZATION:
|
| 68 |
+
1. Analysis of Retrieved Examples:
|
| 69 |
+
- Study rhyme patterns, flow structures, and wordplay
|
| 70 |
+
- Identify unique stylistic elements matching the requested genre
|
| 71 |
+
- Extract metaphors and imagery that fit the context
|
| 72 |
+
- Note how similar themes are handled in different styles
|
| 73 |
+
|
| 74 |
+
2. Technical Song Construction:
|
| 75 |
+
- Mathematical precision in section lengths (8, 16, 32 bars)
|
| 76 |
+
- Strategic placement of hooks and payoff lines
|
| 77 |
+
- Clear sectional contrasts (verse/pre-chorus/chorus dynamics)
|
| 78 |
+
- Melodic phrasing considerations
|
| 79 |
+
- Build tension and release patterns
|
| 80 |
+
|
| 81 |
+
3. Genre-Specific Excellence:
|
| 82 |
+
Pop Structure:
|
| 83 |
+
- Compact, focused verses (8-16 bars)
|
| 84 |
+
- Pre-chorus build-up (4-8 bars)
|
| 85 |
+
- Strong chorus payoff (8 bars)
|
| 86 |
+
- Bridge contrasts (8 bars)
|
| 87 |
+
- Strategic repetition
|
| 88 |
+
- Emphasis on memorable, repeatable phrases
|
| 89 |
+
|
| 90 |
+
Hip-Hop Elements:
|
| 91 |
+
- Complex rhyme schemes (multisyllabic, internal)
|
| 92 |
+
- Advanced wordplay and metaphors
|
| 93 |
+
- Flow patterns matching contemporary cadences
|
| 94 |
+
- Authentic voice and modern language
|
| 95 |
+
- Hooks that balance complexity with accessibility
|
| 96 |
+
|
| 97 |
+
GENERATION APPROACH:
|
| 98 |
+
1. First, analyze retrieved lyrics for:
|
| 99 |
+
- Successful patterns and techniques
|
| 100 |
+
- Thematic handling
|
| 101 |
+
- Style-specific elements
|
| 102 |
+
- Unique devices worth adapting
|
| 103 |
+
|
| 104 |
+
2. Then craft content that:
|
| 105 |
+
- Adapts identified patterns thoughtfully
|
| 106 |
+
- Maintains professional structure
|
| 107 |
+
- Blends inspiration with innovation
|
| 108 |
+
- Stays current and authentic
|
| 109 |
+
- Avoids dated or overdone tropes
|
| 110 |
+
|
| 111 |
+
3. Always ensure:
|
| 112 |
+
- Technical excellence in rhythm and flow
|
| 113 |
+
- Proper section marking [Verse/Chorus/etc]
|
| 114 |
+
- Emotional resonance and authenticity
|
| 115 |
+
- Modern, fresh perspective
|
| 116 |
+
- Strategic use of space and silence
|
| 117 |
+
|
| 118 |
+
Previous Chat History:
|
| 119 |
+
{chat_history}
|
| 120 |
+
|
| 121 |
+
Retrieved Examples for Analysis:
|
| 122 |
+
{context}
|
| 123 |
+
|
| 124 |
+
Response Format:
|
| 125 |
+
1. Brief analysis of relevant patterns from retrieved examples
|
| 126 |
+
2. Generated lyrics with clear section marking
|
| 127 |
+
3. Explanation of how you adapted specific elements from the sources
|
| 128 |
+
|
| 129 |
User Request: {question}
|
| 130 |
|
| 131 |
+
Response: Let me analyze the retrieved lyrics and craft something that
|
| 132 |
+
combines their strongest elements with professional songwriting principles."""
|
| 133 |
|
| 134 |
prompt = PromptTemplate(
|
| 135 |
input_variables=["context", "chat_history", "question"],
|
|
|
|
| 177 |
chat_history = []
|
| 178 |
|
| 179 |
try:
|
| 180 |
+
# Get source documents with scores first
|
| 181 |
docs_and_scores = self.vector_store.similarity_search_with_score(
|
| 182 |
prompt,
|
| 183 |
+
k=20
|
| 184 |
)
|
| 185 |
|
| 186 |
# Sort by similarity (convert distance to similarity)
|
| 187 |
docs_and_scores.sort(key=lambda x: x[1], reverse=False)
|
| 188 |
|
| 189 |
+
# Create detailed context log
|
| 190 |
+
context_details = []
|
| 191 |
+
for doc, score in docs_and_scores[:5]: # Log top 5 for brevity
|
| 192 |
+
similarity = round((1 - score) * 100, 2)
|
| 193 |
+
context_details.append({
|
| 194 |
+
'artist': doc.metadata['artist'],
|
| 195 |
+
'song': doc.metadata['song_title'],
|
| 196 |
+
'similarity': similarity,
|
| 197 |
+
'content': doc.page_content[:200] + "..." # First 200 chars
|
| 198 |
+
})
|
| 199 |
+
|
| 200 |
+
# Generate response using invoke
|
| 201 |
+
response = self.qa_chain.invoke({
|
| 202 |
"question": prompt,
|
| 203 |
"chat_history": chat_history
|
| 204 |
})
|
| 205 |
|
| 206 |
+
# Add detailed context to response
|
| 207 |
response["source_documents_with_scores"] = docs_and_scores
|
| 208 |
+
response["context_details"] = context_details
|
| 209 |
|
| 210 |
return response
|
| 211 |
|