James Edmunds commited on
Commit
ac8d6e6
Β·
1 Parent(s): 4f8d0ad

another path fix for HF

Browse files
Files changed (3) hide show
  1. app.py +24 -26
  2. scripts/test_environment.py +31 -7
  3. src/generator/generator.py +18 -13
app.py CHANGED
@@ -72,25 +72,32 @@ def main():
72
  )
73
 
74
  st.title("SongLift LyrGen2 - AI Lyrics Generator")
75
- print("===== Application Startup at", datetime.now().strftime("%Y-%m-%d %H:%M:%S"), "=====\n")
76
 
77
- # Run environment tests if in HuggingFace
78
- if Settings.is_huggingface():
79
- print("\n=== Running Environment Tests ===")
80
- tests_passed = test_environment()
81
- if not tests_passed:
82
- st.error("⚠️ Environment tests failed. Check the logs for details.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83
  return
84
- print("βœ“ Environment tests passed")
85
-
86
- # Initialize generator
87
- try:
88
- print("\n=== Initializing Generator ===")
89
- generator = LyricGenerator()
90
- except Exception as e:
91
- st.error(f"Error initializing generator: {str(e)}")
92
- print(f"Error: {str(e)}")
93
- return
94
 
95
  # Check OpenAI API key at startup
96
  if Settings.is_huggingface():
@@ -112,15 +119,6 @@ def main():
112
 
113
  print(f"Current deployment mode: {Settings.DEPLOYMENT_MODE}")
114
 
115
- # Initialize generator on first run
116
- if 'generator' not in st.session_state:
117
- generator = initialize_generator()
118
- if generator is None:
119
- st.stop()
120
- st.session_state.generator = generator
121
- st.session_state.chat_history = []
122
- st.session_state.current_lyrics = None
123
-
124
  # Display chat history
125
  for message in st.session_state.chat_history:
126
  user_msg, assistant_msg = message
 
72
  )
73
 
74
  st.title("SongLift LyrGen2 - AI Lyrics Generator")
 
75
 
76
+ # Only run startup once per session
77
+ if 'initialized' not in st.session_state:
78
+ print("===== Application Startup at", datetime.now().strftime("%Y-%m-%d %H:%M:%S"), "=====\n")
79
+
80
+ # Run environment tests if in HuggingFace
81
+ if Settings.is_huggingface():
82
+ print("\n=== Running Environment Tests ===")
83
+ tests_passed = test_environment()
84
+ if not tests_passed:
85
+ st.error("⚠️ Environment tests failed. Check the logs for details.")
86
+ return
87
+ print("βœ“ Environment tests passed")
88
+
89
+ # Initialize generator
90
+ try:
91
+ print("\n=== Initializing Generator ===")
92
+ generator = LyricGenerator()
93
+ st.session_state.generator = generator
94
+ st.session_state.chat_history = []
95
+ st.session_state.current_lyrics = None
96
+ st.session_state.initialized = True
97
+ except Exception as e:
98
+ st.error(f"Error initializing generator: {str(e)}")
99
+ print(f"Error: {str(e)}")
100
  return
 
 
 
 
 
 
 
 
 
 
101
 
102
  # Check OpenAI API key at startup
103
  if Settings.is_huggingface():
 
119
 
120
  print(f"Current deployment mode: {Settings.DEPLOYMENT_MODE}")
121
 
 
 
 
 
 
 
 
 
 
122
  # Display chat history
123
  for message in st.session_state.chat_history:
124
  user_msg, assistant_msg = message
scripts/test_environment.py CHANGED
@@ -1,5 +1,6 @@
1
  import sys
2
  from pathlib import Path
 
3
 
4
  # Add project root to path
5
  project_root = Path(__file__).parent.parent
@@ -19,26 +20,49 @@ def test_openai_connection():
19
  if not Settings.OPENAI_API_KEY:
20
  print("❌ OpenAI API key not found!")
21
  return False
22
- print("βœ“ OpenAI API key found")
23
 
24
- # Test API reachability
25
  try:
26
- response = requests.get("https://api.openai.com")
 
 
 
 
 
27
  print(f"βœ“ OpenAI API reachable (Status: {response.status_code})")
 
 
 
 
 
 
 
 
 
 
 
28
  except Exception as e:
29
- print(f"❌ Cannot reach OpenAI API: {e}")
30
  return False
31
 
32
- # Test embeddings
33
  try:
 
34
  embeddings = OpenAIEmbeddings(
35
- openai_api_key=Settings.OPENAI_API_KEY
 
 
36
  )
 
37
  result = embeddings.embed_query("test")
38
  print(f"βœ“ Embeddings working (vector size: {len(result)})")
39
  return True
40
  except Exception as e:
41
- print(f"❌ Embeddings error: {e}")
 
 
 
42
  return False
43
 
44
 
 
1
  import sys
2
  from pathlib import Path
3
+ import os
4
 
5
  # Add project root to path
6
  project_root = Path(__file__).parent.parent
 
20
  if not Settings.OPENAI_API_KEY:
21
  print("❌ OpenAI API key not found!")
22
  return False
23
+ print(f"βœ“ OpenAI API key found (length: {len(Settings.OPENAI_API_KEY)})")
24
 
25
+ # Test API reachability with more detail
26
  try:
27
+ print("Testing connection to api.openai.com...")
28
+ response = requests.get(
29
+ "https://api.openai.com",
30
+ timeout=10,
31
+ verify=True # Force SSL verification
32
+ )
33
  print(f"βœ“ OpenAI API reachable (Status: {response.status_code})")
34
+ print(f"Response headers: {dict(response.headers)}")
35
+ except requests.exceptions.SSLError as e:
36
+ print(f"❌ SSL Error connecting to OpenAI: {e}")
37
+ return False
38
+ except requests.exceptions.ConnectionError as e:
39
+ print(f"❌ Connection Error: {e}")
40
+ print("Checking if proxy is needed...")
41
+ # Try to get environment proxy settings
42
+ print(f"HTTP_PROXY: {os.environ.get('HTTP_PROXY')}")
43
+ print(f"HTTPS_PROXY: {os.environ.get('HTTPS_PROXY')}")
44
+ return False
45
  except Exception as e:
46
+ print(f"❌ Cannot reach OpenAI API: {type(e).__name__}: {e}")
47
  return False
48
 
49
+ # Test embeddings with more detail
50
  try:
51
+ print("\nTesting embeddings creation...")
52
  embeddings = OpenAIEmbeddings(
53
+ openai_api_key=Settings.OPENAI_API_KEY,
54
+ timeout=30,
55
+ max_retries=2
56
  )
57
+ print("Embeddings object created, attempting query...")
58
  result = embeddings.embed_query("test")
59
  print(f"βœ“ Embeddings working (vector size: {len(result)})")
60
  return True
61
  except Exception as e:
62
+ print(f"❌ Embeddings error: {type(e).__name__}: {e}")
63
+ print("Stack trace:")
64
+ import traceback
65
+ traceback.print_exc()
66
  return False
67
 
68
 
src/generator/generator.py CHANGED
@@ -70,17 +70,23 @@ class LyricGenerator:
70
  data_dir = Path("/data")
71
  data_dir.mkdir(exist_ok=True)
72
 
73
- # Create chroma directory
74
- chroma_dir = data_dir / "chroma"
75
- chroma_dir.mkdir(exist_ok=True)
76
-
77
- # Download the entire chroma directory
78
- print("Downloading Chroma files...")
79
  api = HfApi(token=Settings.HF_TOKEN)
 
 
 
 
 
 
80
 
81
- # Download all files from the chroma directory
82
- for file in api.list_repo_files(Settings.HF_DATASET, repo_type="dataset"):
83
- if file.startswith('chroma/'):
 
 
 
 
 
84
  local_path = hf_hub_download(
85
  repo_id=Settings.HF_DATASET,
86
  filename=file,
@@ -90,10 +96,9 @@ class LyricGenerator:
90
  )
91
  print(f"Downloaded: {file}")
92
 
93
- print("All files downloaded successfully")
94
-
95
- # Set the chroma directory
96
- self.chroma_dir = chroma_dir
97
 
98
  except Exception as e:
99
  print(f"\n=== Error in _setup_embeddings_from_hf ===")
 
70
  data_dir = Path("/data")
71
  data_dir.mkdir(exist_ok=True)
72
 
73
+ # Find the most recent snapshot
 
 
 
 
 
74
  api = HfApi(token=Settings.HF_TOKEN)
75
+ files = api.list_repo_files(Settings.HF_DATASET, repo_type="dataset")
76
+
77
+ # Get all snapshot directories
78
+ snapshots = [f for f in files if 'snapshots' in f and 'chroma' in f]
79
+ if not snapshots:
80
+ raise RuntimeError("No snapshots found in dataset")
81
 
82
+ # Use the most recent snapshot
83
+ latest_snapshot = sorted(snapshots)[-1]
84
+ snapshot_dir = data_dir / latest_snapshot
85
+ print(f"Using snapshot directory: {snapshot_dir}")
86
+
87
+ # Download all files from this snapshot
88
+ for file in files:
89
+ if latest_snapshot in file:
90
  local_path = hf_hub_download(
91
  repo_id=Settings.HF_DATASET,
92
  filename=file,
 
96
  )
97
  print(f"Downloaded: {file}")
98
 
99
+ # Set the chroma directory to the snapshot directory
100
+ self.chroma_dir = snapshot_dir
101
+ print(f"Chroma directory set to: {self.chroma_dir}")
 
102
 
103
  except Exception as e:
104
  print(f"\n=== Error in _setup_embeddings_from_hf ===")