Upload folder using huggingface_hub
Browse files- app.py +15 -19
- src/generator/generator.py +14 -23
app.py
CHANGED
|
@@ -33,6 +33,10 @@ def main():
|
|
| 33 |
|
| 34 |
st.title("SongLift LyrGen2")
|
| 35 |
st.sidebar.markdown(f"**Model:** {Settings.LLM_MODEL}")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 36 |
|
| 37 |
# Only run startup once per session
|
| 38 |
if 'initialized' not in st.session_state:
|
|
@@ -91,36 +95,28 @@ def main():
|
|
| 91 |
|
| 92 |
# Display sources with content
|
| 93 |
with st.expander("View Sources and Context"):
|
| 94 |
-
|
|
|
|
| 95 |
for detail in response["context_details"]:
|
| 96 |
st.write(
|
| 97 |
-
f"\n**{detail['artist']} - {detail['song']}**
|
| 98 |
-
f"(Similarity: {detail['similarity']}%)"
|
| 99 |
)
|
| 100 |
-
st.write("Content snippet:")
|
| 101 |
st.text(detail['content'])
|
| 102 |
st.write("---")
|
| 103 |
|
| 104 |
-
|
|
|
|
| 105 |
seen_sources = set()
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
for doc, score in response["source_documents_with_scores"]:
|
| 109 |
source_key = (
|
| 110 |
-
doc.metadata
|
| 111 |
-
doc.metadata
|
| 112 |
)
|
| 113 |
if source_key not in seen_sources:
|
| 114 |
seen_sources.add(source_key)
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
for doc, score in unique_sources:
|
| 118 |
-
similarity = round((1 - score) * 100, 2)
|
| 119 |
-
st.write(
|
| 120 |
-
f"- {doc.metadata['artist']} - "
|
| 121 |
-
f"{doc.metadata['song_title']} "
|
| 122 |
-
f"(Similarity: {similarity}%)"
|
| 123 |
-
)
|
| 124 |
|
| 125 |
# Update chat history
|
| 126 |
st.session_state.chat_history.append((user_input, lyrics))
|
|
|
|
| 33 |
|
| 34 |
st.title("SongLift LyrGen2")
|
| 35 |
st.sidebar.markdown(f"**Model:** {Settings.LLM_MODEL}")
|
| 36 |
+
if st.sidebar.button("New Song"):
|
| 37 |
+
st.session_state.chat_history = []
|
| 38 |
+
st.session_state.current_lyrics = None
|
| 39 |
+
st.rerun()
|
| 40 |
|
| 41 |
# Only run startup once per session
|
| 42 |
if 'initialized' not in st.session_state:
|
|
|
|
| 95 |
|
| 96 |
# Display sources with content
|
| 97 |
with st.expander("View Sources and Context"):
|
| 98 |
+
# Show top retrieved contexts with snippets
|
| 99 |
+
st.write("### Retrieved Contexts")
|
| 100 |
for detail in response["context_details"]:
|
| 101 |
st.write(
|
| 102 |
+
f"\n**{detail['artist']} - {detail['song']}**"
|
|
|
|
| 103 |
)
|
|
|
|
| 104 |
st.text(detail['content'])
|
| 105 |
st.write("---")
|
| 106 |
|
| 107 |
+
# Show all unique source songs from the chain
|
| 108 |
+
st.write("### All Sources Used")
|
| 109 |
seen_sources = set()
|
| 110 |
+
source_docs = response.get("source_documents", [])
|
| 111 |
+
for doc in source_docs:
|
|
|
|
| 112 |
source_key = (
|
| 113 |
+
doc.metadata.get('artist', 'Unknown'),
|
| 114 |
+
doc.metadata.get('song_title', 'Unknown')
|
| 115 |
)
|
| 116 |
if source_key not in seen_sources:
|
| 117 |
seen_sources.add(source_key)
|
| 118 |
+
st.write(f"- {source_key[0]} - {source_key[1]}")
|
| 119 |
+
st.write(f"\n*{len(seen_sources)} unique songs from {len({s[0] for s in seen_sources})} artists*")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 120 |
|
| 121 |
# Update chat history
|
| 122 |
st.session_state.chat_history.append((user_input, lyrics))
|
src/generator/generator.py
CHANGED
|
@@ -359,29 +359,9 @@ User Request: {question}"""
|
|
| 359 |
print("Starting lyrics generation process...")
|
| 360 |
print(f"Using OpenAI model: {Settings.LLM_MODEL}")
|
| 361 |
|
| 362 |
-
# Get source documents
|
| 363 |
-
print("Searching for similar documents...")
|
| 364 |
-
try:
|
| 365 |
-
results = self._similarity_search_with_retry(prompt)
|
| 366 |
-
# results is a list of (Document, score) tuples
|
| 367 |
-
docs_and_scores = [(doc[0], doc[1]) for doc in results] # Unpack tuples correctly
|
| 368 |
-
except Exception as e:
|
| 369 |
-
print(f"Error during similarity search: {str(e)}")
|
| 370 |
-
raise RuntimeError(f"Failed to search vector store: {str(e)}")
|
| 371 |
-
|
| 372 |
-
# Create detailed context log
|
| 373 |
-
context_details = []
|
| 374 |
-
for doc, score in docs_and_scores[:5]: # Log top 5 for brevity
|
| 375 |
-
context_details.append({
|
| 376 |
-
'artist': doc.metadata['artist'],
|
| 377 |
-
'song': doc.metadata['song_title'],
|
| 378 |
-
'similarity': f"{score:.2f}",
|
| 379 |
-
'content': doc.page_content[:200] + "..."
|
| 380 |
-
})
|
| 381 |
-
|
| 382 |
try:
|
| 383 |
print("Attempting OpenAI API call...")
|
| 384 |
-
# Generate response using invoke
|
| 385 |
response = self.qa_chain.invoke({
|
| 386 |
"question": prompt,
|
| 387 |
"chat_history": chat_history
|
|
@@ -407,8 +387,19 @@ User Request: {question}"""
|
|
| 407 |
else:
|
| 408 |
raise RuntimeError(f"OpenAI API error: {error_msg}")
|
| 409 |
|
| 410 |
-
#
|
| 411 |
-
response
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 412 |
response["context_details"] = context_details
|
| 413 |
|
| 414 |
return response
|
|
|
|
| 359 |
print("Starting lyrics generation process...")
|
| 360 |
print(f"Using OpenAI model: {Settings.LLM_MODEL}")
|
| 361 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 362 |
try:
|
| 363 |
print("Attempting OpenAI API call...")
|
| 364 |
+
# Generate response using invoke — DiverseRetriever handles retrieval
|
| 365 |
response = self.qa_chain.invoke({
|
| 366 |
"question": prompt,
|
| 367 |
"chat_history": chat_history
|
|
|
|
| 387 |
else:
|
| 388 |
raise RuntimeError(f"OpenAI API error: {error_msg}")
|
| 389 |
|
| 390 |
+
# Build context details from the chain's actual source documents
|
| 391 |
+
source_docs = response.get("source_documents", [])
|
| 392 |
+
context_details = []
|
| 393 |
+
for doc in source_docs[:10]:
|
| 394 |
+
context_details.append({
|
| 395 |
+
'artist': doc.metadata.get('artist', 'Unknown'),
|
| 396 |
+
'song': doc.metadata.get('song_title', 'Unknown'),
|
| 397 |
+
'content': doc.page_content[:200] + "..."
|
| 398 |
+
})
|
| 399 |
+
|
| 400 |
+
unique_artists = len({d['artist'] for d in context_details})
|
| 401 |
+
print(f"Sources shown: {len(context_details)} chunks from {unique_artists} artists")
|
| 402 |
+
|
| 403 |
response["context_details"] = context_details
|
| 404 |
|
| 405 |
return response
|