James-Edmunds commited on
Commit
a998f2d
·
verified ·
1 Parent(s): 60bea6d

Upload folder using huggingface_hub

Browse files
Files changed (2) hide show
  1. app.py +15 -19
  2. src/generator/generator.py +14 -23
app.py CHANGED
@@ -33,6 +33,10 @@ def main():
33
 
34
  st.title("SongLift LyrGen2")
35
  st.sidebar.markdown(f"**Model:** {Settings.LLM_MODEL}")
 
 
 
 
36
 
37
  # Only run startup once per session
38
  if 'initialized' not in st.session_state:
@@ -91,36 +95,28 @@ def main():
91
 
92
  # Display sources with content
93
  with st.expander("View Sources and Context"):
94
- st.write("### Top Retrieved Contexts")
 
95
  for detail in response["context_details"]:
96
  st.write(
97
- f"\n**{detail['artist']} - {detail['song']}** "
98
- f"(Similarity: {detail['similarity']}%)"
99
  )
100
- st.write("Content snippet:")
101
  st.text(detail['content'])
102
  st.write("---")
103
 
104
- st.write("\n### All Similar Sources")
 
105
  seen_sources = set()
106
- unique_sources = []
107
-
108
- for doc, score in response["source_documents_with_scores"]:
109
  source_key = (
110
- doc.metadata['artist'],
111
- doc.metadata['song_title']
112
  )
113
  if source_key not in seen_sources:
114
  seen_sources.add(source_key)
115
- unique_sources.append((doc, score))
116
-
117
- for doc, score in unique_sources:
118
- similarity = round((1 - score) * 100, 2)
119
- st.write(
120
- f"- {doc.metadata['artist']} - "
121
- f"{doc.metadata['song_title']} "
122
- f"(Similarity: {similarity}%)"
123
- )
124
 
125
  # Update chat history
126
  st.session_state.chat_history.append((user_input, lyrics))
 
33
 
34
  st.title("SongLift LyrGen2")
35
  st.sidebar.markdown(f"**Model:** {Settings.LLM_MODEL}")
36
+ if st.sidebar.button("New Song"):
37
+ st.session_state.chat_history = []
38
+ st.session_state.current_lyrics = None
39
+ st.rerun()
40
 
41
  # Only run startup once per session
42
  if 'initialized' not in st.session_state:
 
95
 
96
  # Display sources with content
97
  with st.expander("View Sources and Context"):
98
+ # Show top retrieved contexts with snippets
99
+ st.write("### Retrieved Contexts")
100
  for detail in response["context_details"]:
101
  st.write(
102
+ f"\n**{detail['artist']} - {detail['song']}**"
 
103
  )
 
104
  st.text(detail['content'])
105
  st.write("---")
106
 
107
+ # Show all unique source songs from the chain
108
+ st.write("### All Sources Used")
109
  seen_sources = set()
110
+ source_docs = response.get("source_documents", [])
111
+ for doc in source_docs:
 
112
  source_key = (
113
+ doc.metadata.get('artist', 'Unknown'),
114
+ doc.metadata.get('song_title', 'Unknown')
115
  )
116
  if source_key not in seen_sources:
117
  seen_sources.add(source_key)
118
+ st.write(f"- {source_key[0]} - {source_key[1]}")
119
+ st.write(f"\n*{len(seen_sources)} unique songs from {len({s[0] for s in seen_sources})} artists*")
 
 
 
 
 
 
 
120
 
121
  # Update chat history
122
  st.session_state.chat_history.append((user_input, lyrics))
src/generator/generator.py CHANGED
@@ -359,29 +359,9 @@ User Request: {question}"""
359
  print("Starting lyrics generation process...")
360
  print(f"Using OpenAI model: {Settings.LLM_MODEL}")
361
 
362
- # Get source documents
363
- print("Searching for similar documents...")
364
- try:
365
- results = self._similarity_search_with_retry(prompt)
366
- # results is a list of (Document, score) tuples
367
- docs_and_scores = [(doc[0], doc[1]) for doc in results] # Unpack tuples correctly
368
- except Exception as e:
369
- print(f"Error during similarity search: {str(e)}")
370
- raise RuntimeError(f"Failed to search vector store: {str(e)}")
371
-
372
- # Create detailed context log
373
- context_details = []
374
- for doc, score in docs_and_scores[:5]: # Log top 5 for brevity
375
- context_details.append({
376
- 'artist': doc.metadata['artist'],
377
- 'song': doc.metadata['song_title'],
378
- 'similarity': f"{score:.2f}",
379
- 'content': doc.page_content[:200] + "..."
380
- })
381
-
382
  try:
383
  print("Attempting OpenAI API call...")
384
- # Generate response using invoke
385
  response = self.qa_chain.invoke({
386
  "question": prompt,
387
  "chat_history": chat_history
@@ -407,8 +387,19 @@ User Request: {question}"""
407
  else:
408
  raise RuntimeError(f"OpenAI API error: {error_msg}")
409
 
410
- # Add detailed context to response
411
- response["source_documents_with_scores"] = docs_and_scores
 
 
 
 
 
 
 
 
 
 
 
412
  response["context_details"] = context_details
413
 
414
  return response
 
359
  print("Starting lyrics generation process...")
360
  print(f"Using OpenAI model: {Settings.LLM_MODEL}")
361
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
362
  try:
363
  print("Attempting OpenAI API call...")
364
+ # Generate response using invoke — DiverseRetriever handles retrieval
365
  response = self.qa_chain.invoke({
366
  "question": prompt,
367
  "chat_history": chat_history
 
387
  else:
388
  raise RuntimeError(f"OpenAI API error: {error_msg}")
389
 
390
+ # Build context details from the chain's actual source documents
391
+ source_docs = response.get("source_documents", [])
392
+ context_details = []
393
+ for doc in source_docs[:10]:
394
+ context_details.append({
395
+ 'artist': doc.metadata.get('artist', 'Unknown'),
396
+ 'song': doc.metadata.get('song_title', 'Unknown'),
397
+ 'content': doc.page_content[:200] + "..."
398
+ })
399
+
400
+ unique_artists = len({d['artist'] for d in context_details})
401
+ print(f"Sources shown: {len(context_details)} chunks from {unique_artists} artists")
402
+
403
  response["context_details"] = context_details
404
 
405
  return response