| | import os |
| | import io |
| | import gradio as gr |
| | import torch |
| | import numpy as np |
| | import re |
| | import pronouncing |
| | import functools |
| | from transformers import ( |
| | AutoModelForAudioClassification, |
| | AutoFeatureExtractor, |
| | AutoTokenizer, |
| | pipeline, |
| | AutoModelForCausalLM, |
| | BitsAndBytesConfig |
| | ) |
| | from huggingface_hub import login |
| | from utils import ( |
| | load_audio, |
| | extract_audio_duration, |
| | extract_mfcc_features, |
| | format_genre_results, |
| | ensure_cuda_availability |
| | ) |
| | from emotionanalysis import MusicAnalyzer |
| | import librosa |
| | from beat_analysis import BeatAnalyzer |
| |
|
| | |
| | beat_analyzer = BeatAnalyzer() |
| |
|
| | |
| | if "HF_TOKEN" in os.environ: |
| | login(token=os.environ["HF_TOKEN"]) |
| |
|
| | |
| | GENRE_MODEL_NAME = "dima806/music_genres_classification" |
| | MUSIC_DETECTION_MODEL = "MIT/ast-finetuned-audioset-10-10-0.4593" |
| | LLM_MODEL_NAME = "Qwen/QwQ-32B" |
| | SAMPLE_RATE = 22050 |
| |
|
| | |
| | CUDA_AVAILABLE = ensure_cuda_availability() |
| |
|
| | |
| | print("Loading genre classification model...") |
| | try: |
| | genre_feature_extractor = AutoFeatureExtractor.from_pretrained(GENRE_MODEL_NAME) |
| | genre_model = AutoModelForAudioClassification.from_pretrained( |
| | GENRE_MODEL_NAME, |
| | device_map="auto" if CUDA_AVAILABLE else None |
| | ) |
| | |
| | def get_genre_model(): |
| | return genre_model, genre_feature_extractor |
| | except Exception as e: |
| | print(f"Error loading genre model: {str(e)}") |
| | genre_model = None |
| | genre_feature_extractor = None |
| |
|
| | |
| | print("Loading Qwen QwQ-32B model with 4-bit quantization...") |
| | try: |
| | |
| | quantization_config = BitsAndBytesConfig( |
| | load_in_4bit=True, |
| | bnb_4bit_quant_type="nf4", |
| | bnb_4bit_compute_dtype=torch.float16, |
| | bnb_4bit_use_double_quant=True |
| | ) |
| | |
| | llm_tokenizer = AutoTokenizer.from_pretrained(LLM_MODEL_NAME) |
| | llm_model = AutoModelForCausalLM.from_pretrained( |
| | LLM_MODEL_NAME, |
| | quantization_config=quantization_config, |
| | device_map="auto", |
| | trust_remote_code=True, |
| | torch_dtype=torch.float16, |
| | use_cache=True |
| | ) |
| | except Exception as e: |
| | print(f"Error loading LLM model: {str(e)}") |
| | llm_tokenizer = None |
| | llm_model = None |
| |
|
| | |
| | music_analyzer = MusicAnalyzer() |
| |
|
| | |
| | def process_audio(audio_file, custom_prompt=""): |
| | if audio_file is None: |
| | return "No audio file provided", None, None, None, None, None, None, None, None, None |
| | |
| | try: |
| | |
| | y, sr = load_audio(audio_file, sr=SAMPLE_RATE) |
| | |
| | |
| | duration = extract_audio_duration(y, sr) |
| | |
| | |
| | time_sig_result = beat_analyzer.detect_time_signature(audio_file) |
| | time_signature = time_sig_result["time_signature"] |
| | |
| | |
| | music_analysis = music_analyzer.analyze_music(audio_file) |
| | |
| | |
| | tempo = music_analysis["rhythm_analysis"]["tempo"] |
| | |
| | |
| | emotion_scores = music_analysis["emotion_analysis"]["emotion_scores"] |
| | sorted_emotions = sorted(emotion_scores.items(), key=lambda x: x[1], reverse=True) |
| | primary_emotion = sorted_emotions[0][0] |
| | secondary_emotion = sorted_emotions[1][0] if len(sorted_emotions) > 1 else None |
| | |
| | |
| | theme_scores = music_analysis["theme_analysis"]["theme_scores"] |
| | sorted_themes = sorted(theme_scores.items(), key=lambda x: x[1], reverse=True) |
| | primary_theme = sorted_themes[0][0] |
| | secondary_theme = sorted_themes[1][0] if len(sorted_themes) > 1 else None |
| | |
| | |
| | if genre_model is not None and genre_feature_extractor is not None: |
| | |
| | y_16k = librosa.resample(y, orig_sr=sr, target_sr=16000) |
| | |
| | |
| | inputs = genre_feature_extractor( |
| | y_16k, |
| | sampling_rate=16000, |
| | return_tensors="pt" |
| | ).to(genre_model.device) |
| | |
| | |
| | with torch.no_grad(): |
| | outputs = genre_model(**inputs) |
| | logits = outputs.logits |
| | probs = torch.nn.functional.softmax(logits, dim=-1) |
| | |
| | |
| | values, indices = torch.topk(probs[0], k=5) |
| | top_genres = [(genre_model.config.id2label[idx.item()], val.item()) for val, idx in zip(values, indices)] |
| | else: |
| | |
| | top_genres = [("Unknown", 1.0)] |
| | |
| | |
| | genre_results_text = format_genre_results(top_genres) |
| | primary_genre = top_genres[0][0] |
| | |
| | |
| | if time_signature not in ["4/4", "3/4", "6/8"]: |
| | time_signature = "4/4" |
| | |
| | |
| | beat_analysis = beat_analyzer.analyze_beat_pattern(audio_file, time_signature=time_signature, auto_detect=False) |
| | lyric_templates = beat_analyzer.create_lyric_template(beat_analysis) |
| | |
| | |
| | music_analysis["beat_analysis"] = beat_analysis |
| | music_analysis["lyric_templates"] = lyric_templates |
| | |
| | |
| | analysis_summary = f""" |
| | ### Music Analysis Results |
| | |
| | **Duration:** {duration:.2f} seconds |
| | **Tempo:** {tempo:.1f} BPM |
| | **Time Signature:** {time_signature} (Confidence: {time_sig_result["confidence"]:.1%}) |
| | **Key:** {music_analysis["tonal_analysis"]["key"]} {music_analysis["tonal_analysis"]["mode"]} |
| | |
| | **Emotions:** |
| | - Primary: {primary_emotion} (Confidence: {emotion_scores[primary_emotion]:.1%}) |
| | - Secondary: {secondary_emotion} (Confidence: {emotion_scores[secondary_emotion]:.1%}) |
| | |
| | **Themes:** |
| | - Primary: {primary_theme} (Confidence: {theme_scores[primary_theme]:.1%}) |
| | - Secondary: {secondary_theme} (Confidence: {theme_scores[secondary_theme]:.1%}) |
| | |
| | **Top Genre:** {primary_genre} |
| | |
| | {genre_results_text} |
| | """ |
| |
|
| | |
| | if lyric_templates: |
| | analysis_summary += f""" |
| | ### Beat Analysis |
| | |
| | **Total Phrases:** {len(lyric_templates)} |
| | **Average Beats Per Phrase:** {np.mean([t['num_beats'] for t in lyric_templates]):.1f} |
| | **Beat Pattern Examples:** |
| | - Phrase 1: {lyric_templates[0]['stress_pattern'] if lyric_templates else 'N/A'} |
| | - Phrase 2: {lyric_templates[1]['stress_pattern'] if len(lyric_templates) > 1 else 'N/A'} |
| | """ |
| | |
| | |
| | genre_supported = any(genre.lower() in primary_genre.lower() for genre in beat_analyzer.supported_genres) |
| | |
| | |
| | if genre_supported: |
| | lyrics = generate_lyrics(music_analysis, primary_genre, duration, custom_prompt) |
| | beat_match_analysis = analyze_lyrics_rhythm_match(lyrics, lyric_templates, primary_genre) |
| | else: |
| | supported_genres_str = ", ".join([genre.capitalize() for genre in beat_analyzer.supported_genres]) |
| | lyrics = f"Lyrics generation is only supported for the following genres: {supported_genres_str}.\n\nDetected genre '{primary_genre}' doesn't have strong syllable-to-beat patterns required for our lyric generation algorithm." |
| | beat_match_analysis = "Lyrics generation not available for this genre." |
| | |
| | return analysis_summary, lyrics, tempo, time_signature, primary_emotion, secondary_emotion, primary_theme, secondary_theme, primary_genre, beat_match_analysis |
| | |
| | except Exception as e: |
| | error_msg = f"Error processing audio: {str(e)}" |
| | print(error_msg) |
| | return error_msg, None, None, None, None, None, None, None, None, None |
| |
|
| | def generate_lyrics(music_analysis, genre, duration, custom_prompt=""): |
| | try: |
| | |
| | tempo = music_analysis["rhythm_analysis"]["tempo"] |
| | key = music_analysis["tonal_analysis"]["key"] |
| | mode = music_analysis["tonal_analysis"]["mode"] |
| | |
| | |
| | emotion_scores = music_analysis["emotion_analysis"]["emotion_scores"] |
| | sorted_emotions = sorted(emotion_scores.items(), key=lambda x: x[1], reverse=True) |
| | primary_emotion = sorted_emotions[0][0] |
| | secondary_emotion = sorted_emotions[1][0] if len(sorted_emotions) > 1 else None |
| | |
| | theme_scores = music_analysis["theme_analysis"]["theme_scores"] |
| | sorted_themes = sorted(theme_scores.items(), key=lambda x: x[1], reverse=True) |
| | primary_theme = sorted_themes[0][0] |
| | secondary_theme = sorted_themes[1][0] if len(sorted_themes) > 1 else None |
| | |
| | |
| | lyric_templates = music_analysis.get("lyric_templates", []) |
| | |
| | |
| | |
| | if not lyric_templates: |
| | num_phrases_for_prompt = 4 |
| | min_syl_for_prompt = 2 |
| | max_syl_for_prompt = 7 |
| | |
| | |
| | base_prompt = f'''You are a professional songwriter. Write song lyrics for a {genre} song. |
| | |
| | SONG DETAILS: |
| | - Key: {key} {mode} |
| | - Tempo: {tempo} BPM |
| | - Primary emotion: {primary_emotion} |
| | - Secondary emotion: {secondary_emotion} |
| | - Primary theme: {primary_theme} |
| | - Secondary theme: {secondary_theme}''' |
| |
|
| | |
| | custom_requirements = "" |
| | if custom_prompt and custom_prompt.strip(): |
| | custom_requirements = f''' |
| | |
| | SPECIAL REQUIREMENTS FROM USER: |
| | {custom_prompt.strip()} |
| | Please incorporate these requirements while still following all the technical constraints below.''' |
| |
|
| | prompt = base_prompt + custom_requirements + f''' |
| | |
| | CRITICAL REQUIREMENTS (MOST IMPORTANT): |
| | - You MUST write EXACTLY {num_phrases_for_prompt} lines of lyrics. |
| | - Number each lyric line starting from 1 up to {num_phrases_for_prompt}. For example: |
| | 1. First lyric line. |
| | 2. Second lyric line. |
| | ... |
| | {num_phrases_for_prompt}. The final lyric line. |
| | - Each numbered line (after removing the number and period) MUST be {min_syl_for_prompt}-{max_syl_for_prompt} syllables MAXIMUM. |
| | - NO line's content (after removing the number) can exceed {max_syl_for_prompt} syllables. This is EXTREMELY IMPORTANT. |
| | - Count syllables carefully for the content of each numbered line. |
| | - Use SHORT WORDS and SHORT PHRASES for the content of each numbered line. |
| | - Break long thoughts into multiple numbered lines. |
| | |
| | CREATIVITY GUIDELINES: |
| | - Create original, vivid imagery that captures the emotions. |
| | - Use concrete, sensory details (what you see, hear, feel, touch). |
| | - Avoid clichΓ©s and common phrases. |
| | - Draw inspiration from the specific themes and emotions listed above. |
| | - Think about unique moments, specific objects, or personal details. |
| | - Use unexpected word combinations. |
| | - Focus on the particular mood created by {primary_emotion} and {secondary_emotion}. |
| | |
| | STYLE FOR SHORT LINES (for the content of each numbered line): |
| | - Use brief, impactful phrases. |
| | - Focus on single images or moments per line. |
| | - Choose simple, everyday words. |
| | - Let each line paint one clear picture. |
| | |
| | ABSOLUTELY NO placeholders like [line], [moment], [breath], [phrase], [word], etc. |
| | |
| | OUTPUT FORMAT: |
| | Under the "LYRICS:" heading, provide exactly {num_phrases_for_prompt} numbered lyric lines. |
| | |
| | LYRICS: |
| | (Your {num_phrases_for_prompt} numbered lyric lines go here, each starting with its number, a period, and a space) |
| | |
| | Remember: Output EXACTLY {num_phrases_for_prompt} numbered lyric lines. Each line's content (after removing the number) must be {min_syl_for_prompt}-{max_syl_for_prompt} syllables.''' |
| | else: |
| | |
| | num_phrases_for_prompt = len(lyric_templates) |
| | max_syl_for_prompt = max([t.get('max_expected', 7) for t in lyric_templates]) if lyric_templates and lyric_templates[0].get('max_expected') else 7 |
| | min_syl_for_prompt = min([t.get('min_expected', 2) for t in lyric_templates]) if lyric_templates and lyric_templates[0].get('min_expected') else 2 |
| | |
| | |
| | base_prompt = f'''You are a professional songwriter. Write song lyrics for a {genre} song. |
| | |
| | SONG DETAILS: |
| | - Key: {key} {mode} |
| | - Tempo: {tempo} BPM |
| | - Primary emotion: {primary_emotion} |
| | - Secondary emotion: {secondary_emotion} |
| | - Primary theme: {primary_theme} |
| | - Secondary theme: {secondary_theme}''' |
| |
|
| | |
| | custom_requirements = "" |
| | if custom_prompt and custom_prompt.strip(): |
| | custom_requirements = f''' |
| | |
| | SPECIAL REQUIREMENTS FROM USER: |
| | {custom_prompt.strip()} |
| | Please incorporate these requirements while still following all the technical constraints below.''' |
| |
|
| | prompt = base_prompt + custom_requirements + f''' |
| | |
| | CRITICAL REQUIREMENTS (MOST IMPORTANT): |
| | - You MUST write EXACTLY {num_phrases_for_prompt} lines of lyrics. |
| | - Number each lyric line starting from 1 up to {num_phrases_for_prompt}. For example: |
| | 1. First lyric line. |
| | 2. Second lyric line. |
| | ... |
| | {num_phrases_for_prompt}. The final lyric line. |
| | - Each numbered line (after removing the number and period) MUST be {min_syl_for_prompt}-{max_syl_for_prompt} syllables MAXIMUM. |
| | - NO line's content (after removing the number) can exceed {max_syl_for_prompt} syllables. This is EXTREMELY IMPORTANT. |
| | - Count syllables carefully for the content of each numbered line. |
| | - Use SHORT WORDS and SHORT PHRASES for the content of each numbered line. |
| | - Break long thoughts into multiple numbered lines. |
| | |
| | CREATIVITY GUIDELINES: |
| | - Create original, vivid imagery that captures the emotions. |
| | - Use concrete, sensory details (what you see, hear, feel, touch). |
| | - Avoid clichΓ©s and common phrases. |
| | - Draw inspiration from the specific themes and emotions listed above. |
| | - Think about unique moments, specific objects, or personal details. |
| | - Use unexpected word combinations. |
| | - Focus on the particular mood created by {primary_emotion} and {secondary_emotion}. |
| | |
| | STYLE FOR SHORT LINES (for the content of each numbered line): |
| | - Use brief, impactful phrases. |
| | - Focus on single images or moments per line. |
| | - Choose simple, everyday words. |
| | - Let each line paint one clear picture. |
| | |
| | ABSOLUTELY NO placeholders like [line], [moment], [breath], [phrase], [word], etc. |
| | |
| | OUTPUT FORMAT: |
| | Under the "LYRICS:" heading, provide exactly {num_phrases_for_prompt} numbered lyric lines. |
| | |
| | LYRICS: |
| | (Your {num_phrases_for_prompt} numbered lyric lines go here, each starting with its number, a period, and a space) |
| | |
| | Remember: Output EXACTLY {num_phrases_for_prompt} numbered lyric lines. Each line's content (after removing the number) must be {min_syl_for_prompt}-{max_syl_for_prompt} syllables.''' |
| | |
| | messages = [ |
| | {"role": "user", "content": prompt} |
| | ] |
| | |
| | |
| | text = llm_tokenizer.apply_chat_template( |
| | messages, |
| | tokenize=False, |
| | add_generation_prompt=True |
| | ) |
| | |
| | |
| | model_inputs = llm_tokenizer([text], return_tensors="pt").to(llm_model.device) |
| | |
| | |
| | generated_ids = llm_model.generate( |
| | **model_inputs, |
| | max_new_tokens=2048, |
| | do_sample=True, |
| | temperature=0.6, |
| | top_p=0.95, |
| | top_k=30, |
| | repetition_penalty=1.1, |
| | pad_token_id=llm_tokenizer.eos_token_id |
| | ) |
| | |
| | |
| | output_ids = generated_ids[0][len(model_inputs.input_ids[0]):].tolist() |
| | lyrics = llm_tokenizer.decode(output_ids, skip_special_tokens=True).strip() |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | lyrics = re.sub(r'<think>.*?</think>', '', lyrics, flags=re.DOTALL | re.IGNORECASE) |
| | lyrics = re.sub(r'<think>', '', lyrics, flags=re.IGNORECASE) |
| | lyrics = re.sub(r'</think>', '', lyrics, flags=re.IGNORECASE) |
| | |
| | |
| | lyrics_section_match = re.search(r'LYRICS:\s*\n(.*?)(?:\n\n|\Z)', lyrics, re.DOTALL | re.IGNORECASE) |
| | if lyrics_section_match: |
| | lyrics = lyrics_section_match.group(1).strip() |
| | else: |
| | |
| | lyric_start_patterns = [ |
| | r'(?:here (?:are )?(?:the )?lyrics?:?|lyrics?:?|my lyrics?:?|song lyrics?:?)\s*', |
| | r'(?:here (?:is )?(?:a )?song:?|here (?:is )?my song:?)\s*', |
| | r'(?:\*{3,}|\={3,}|\-{3,})\s*', |
| | r'(?:final lyrics?:?|the lyrics?:?)\s*', |
| | r'```\s*' |
| | ] |
| | |
| | |
| | lyrics_start_pos = 0 |
| | for pattern in lyric_start_patterns: |
| | match = re.search(pattern, lyrics, re.IGNORECASE) |
| | if match: |
| | lyrics_start_pos = max(lyrics_start_pos, match.end()) |
| | |
| | |
| | if lyrics_start_pos > 0: |
| | lyrics = lyrics[lyrics_start_pos:].strip() |
| | |
| | |
| | lines = lyrics.strip().split('\n') |
| | clean_lines = [] |
| | |
| | |
| | for line in lines: |
| | line = line.strip() |
| | if not line or line.isspace(): |
| | continue |
| | |
| | |
| | line = re.sub(r'^\d+\.\s*', '', line) |
| | |
| | line_lower = line.lower() |
| | |
| | |
| | if re.match(r'^\[ *(line|moment|breath|phrase|word|sound) *\]$', line_lower): |
| | continue |
| | |
| | |
| | if any(phrase in line_lower for phrase in [ |
| | 'line 1', 'line 2', 'line 3', |
| | 'thinking', 'lyrics:', 'format:', 'etc...', 'commentary', |
| | 'syllables', 'requirements', 'output', 'provide' |
| | ]): |
| | continue |
| | |
| | |
| | if re.match(r'^\d+[\.\):]|^\[.*\]$', line): |
| | continue |
| | |
| | |
| | words = line.split() |
| | if 1 <= len(words) <= 8 and not any(tech_word in line_lower for tech_word in [ |
| | 'syllable', 'beat', 'tempo', 'analysis', 'format', 'section' |
| | ]): |
| | clean_lines.append(line) |
| | |
| | |
| | |
| | final_clean_lines = [] |
| | for line in clean_lines: |
| | |
| | line = re.sub(r'\s+//.*$', '', line) |
| | line = re.sub(r'\s+\(.*?\)$', '', line) |
| | |
| | |
| | line = re.sub(r'\s*\(\d+\s*syllables?\)', '', line, flags=re.IGNORECASE) |
| | |
| | |
| | if line.strip(): |
| | final_clean_lines.append(line.strip()) |
| | |
| | clean_lines = final_clean_lines |
| | |
| | |
| | if lyric_templates: |
| | max_allowed_syllables = max([t.get('max_expected', 6) for t in lyric_templates]) |
| | min_allowed_syllables = min([t.get('min_expected', 2) for t in lyric_templates]) |
| | else: |
| | max_allowed_syllables = 6 |
| | min_allowed_syllables = 2 |
| |
|
| | |
| | syllable_enforced_lines = [] |
| | for line in clean_lines: |
| | words = line.split() |
| | current_syllables = sum(beat_analyzer.count_syllables(word) for word in words) |
| | |
| | |
| | if min_allowed_syllables <= current_syllables <= max_allowed_syllables: |
| | syllable_enforced_lines.append(line) |
| | |
| | elif current_syllables > max_allowed_syllables: |
| | |
| | current_line = [] |
| | current_count = 0 |
| | |
| | for word in words: |
| | word_syllables = beat_analyzer.count_syllables(word) |
| | |
| | |
| | if current_count + word_syllables > max_allowed_syllables and current_line: |
| | syllable_enforced_lines.append(" ".join(current_line)) |
| | current_line = [word] |
| | current_count = word_syllables |
| | else: |
| | |
| | current_line.append(word) |
| | current_count += word_syllables |
| | |
| | |
| | if current_line and current_count >= min_allowed_syllables: |
| | syllable_enforced_lines.append(" ".join(current_line)) |
| | |
| |
|
| | clean_lines = syllable_enforced_lines |
| |
|
| | |
| | if lyric_templates: |
| | num_required = len(lyric_templates) |
| | else: |
| | num_required = 4 |
| |
|
| | |
| | if len(clean_lines) > num_required: |
| | |
| | merged_lines = [] |
| | i = 0 |
| | |
| | while i < len(clean_lines) and len(merged_lines) < num_required: |
| | if i + 1 < len(clean_lines) and len(merged_lines) < num_required - 1: |
| | |
| | line1 = clean_lines[i] |
| | line2 = clean_lines[i + 1] |
| | |
| | words1 = line1.split() |
| | words2 = line2.split() |
| | |
| | syllables1 = sum(beat_analyzer.count_syllables(word) for word in words1) |
| | syllables2 = sum(beat_analyzer.count_syllables(word) for word in words2) |
| | |
| | |
| | if syllables1 + syllables2 <= max_allowed_syllables: |
| | merged_lines.append(line1 + " " + line2) |
| | i += 2 |
| | else: |
| | merged_lines.append(line1) |
| | i += 1 |
| | else: |
| | merged_lines.append(clean_lines[i]) |
| | i += 1 |
| | |
| | |
| | clean_lines = merged_lines[:num_required] |
| | |
| | elif len(clean_lines) < num_required: |
| | |
| | |
| | |
| | return f"Error: The model generated {len(clean_lines)} lines but {num_required} were required. Please try again." |
| |
|
| | |
| | if len(clean_lines) != num_required: |
| | |
| | if len(clean_lines) > num_required: |
| | clean_lines = clean_lines[:num_required] |
| | else: |
| | |
| | return f"Error: Could not generate exactly {num_required} lines. Please try again." |
| |
|
| | |
| | final_lyrics = '\n'.join(clean_lines) |
| |
|
| | |
| | if not final_lyrics or len(final_lyrics.strip()) < 15: |
| | return "The model output appears to be mostly thinking content. Please try regenerating for cleaner lyrics." |
| |
|
| | return final_lyrics |
| | |
| | except Exception as e: |
| | error_msg = f"Error generating lyrics: {str(e)}" |
| | print(error_msg) |
| | return error_msg |
| |
|
| | def analyze_lyrics_rhythm_match(lyrics, lyric_templates, genre="pop"): |
| | """Analyze how well the generated lyrics match the beat patterns and syllable requirements""" |
| | if not lyric_templates or not lyrics: |
| | return "No beat templates or lyrics available for analysis." |
| | |
| | |
| | lines = lyrics.strip().split('\n') |
| | lines = [line for line in lines if line.strip()] |
| | |
| | |
| | result = "### Beat & Syllable Match Analysis\n\n" |
| | result += "| Line | Syllables | Target Range | Match | Stress Pattern |\n" |
| | result += "| ---- | --------- | ------------ | ----- | -------------- |\n" |
| | |
| | |
| | line_count = min(len(lines), len(lyric_templates)) |
| | |
| | |
| | total_matches = 0 |
| | total_range_matches = 0 |
| | total_stress_matches = 0 |
| | total_stress_percentage = 0 |
| | total_ideal_matches = 0 |
| | |
| | for i in range(line_count): |
| | line = lines[i] |
| | template = lyric_templates[i] |
| | |
| | |
| | check_result = beat_analyzer.check_syllable_stress_match(line, template, genre) |
| | |
| | |
| | if check_result["close_to_ideal"]: |
| | syllable_match = "β" |
| | elif check_result["within_range"]: |
| | syllable_match = "β*" |
| | else: |
| | syllable_match = "β" |
| | |
| | stress_match = "β" if check_result["stress_matches"] else f"{int(check_result['stress_match_percentage']*100)}%" |
| | |
| | |
| | if check_result["close_to_ideal"]: |
| | total_matches += 1 |
| | total_ideal_matches += 1 |
| | elif check_result["within_range"]: |
| | total_range_matches += 1 |
| | |
| | if check_result["stress_matches"]: |
| | total_stress_matches += 1 |
| | total_stress_percentage += check_result["stress_match_percentage"] |
| | |
| | |
| | stress_visual = "" |
| | for char in template['stress_pattern']: |
| | if char == "S": |
| | stress_visual += "X" |
| | elif char == "M": |
| | stress_visual += "x" |
| | else: |
| | stress_visual += "." |
| | |
| | |
| | result += f"| {i+1} | {check_result['syllable_count']} | {check_result['min_expected']}-{check_result['max_expected']} | {syllable_match} | {stress_visual} |\n" |
| | |
| | |
| | if line_count > 0: |
| | exact_match_rate = (total_matches / line_count) * 100 |
| | range_match_rate = ((total_matches + total_range_matches) / line_count) * 100 |
| | ideal_match_rate = (total_ideal_matches / line_count) * 100 |
| | stress_match_rate = (total_stress_matches / line_count) * 100 |
| | avg_stress_percentage = (total_stress_percentage / line_count) * 100 |
| | |
| | result += f"\n**Summary:**\n" |
| | result += f"- Ideal or near-ideal syllable match rate: {exact_match_rate:.1f}%\n" |
| | result += f"- Genre-appropriate syllable range match rate: {range_match_rate:.1f}%\n" |
| | result += f"- Perfect stress pattern match rate: {stress_match_rate:.1f}%\n" |
| | result += f"- Average stress pattern accuracy: {avg_stress_percentage:.1f}%\n" |
| | result += f"- Overall rhythmic accuracy: {((range_match_rate + avg_stress_percentage) / 2):.1f}%\n" |
| | |
| | |
| | sentence_flow_analysis = analyze_sentence_flow(lines) |
| | result += f"\n**Sentence Flow Analysis:**\n" |
| | result += f"- Connected thought groups: {sentence_flow_analysis['connected_groups']} detected\n" |
| | result += f"- Average lines per thought: {sentence_flow_analysis['avg_lines_per_group']:.1f}\n" |
| | result += f"- Flow quality: {sentence_flow_analysis['flow_quality']}\n" |
| | |
| | |
| | result += f"\n**Syllable & Flow Guidance:**\n" |
| | result += f"- Aim for {min([t.get('min_expected', 3) for t in lyric_templates])}-{max([t.get('max_expected', 7) for t in lyric_templates])} syllables per line\n" |
| | result += f"- Break complete thoughts across 2-3 lines for natural flow\n" |
| | result += f"- Connect your lyrics with sentence fragments that flow across lines\n" |
| | result += f"- Use conjunctions, prepositions, and dependent clauses to connect lines\n" |
| | |
| | |
| | result += f"\n**Genre Notes ({genre}):**\n" |
| | |
| | |
| | if genre.lower() == "pop": |
| | result += "- Pop lyrics work well with thoughts spanning 2-3 musical phrases\n" |
| | result += "- Create flow by connecting lines with transitions like 'as', 'when', 'through'\n" |
| | elif genre.lower() == "rock": |
| | result += "- Rock lyrics benefit from short phrases that build into complete thoughts\n" |
| | result += "- Use line breaks strategically to emphasize key words\n" |
| | elif genre.lower() == "country": |
| | result += "- Country lyrics tell stories that flow naturally across multiple lines\n" |
| | result += "- Connect narrative elements across phrases for authentic storytelling\n" |
| | elif genre.lower() == "disco": |
| | result += "- Disco lyrics work well with phrases that create rhythmic momentum\n" |
| | result += "- Use line transitions that maintain energy and flow\n" |
| | elif genre.lower() == "metal": |
| | result += "- Metal lyrics can create intensity by breaking phrases at dramatic points\n" |
| | result += "- Connect lines to build tension and release across measures\n" |
| | else: |
| | result += "- This genre works well with connected thoughts across multiple lines\n" |
| | result += "- Aim for natural speech flow rather than complete thoughts per line\n" |
| | |
| | return result |
| |
|
| | def analyze_sentence_flow(lines): |
| | """Analyze how well the lyrics create sentence flow across multiple lines""" |
| | if not lines or len(lines) < 2: |
| | return { |
| | "connected_groups": 0, |
| | "avg_lines_per_group": 0, |
| | "flow_quality": "Insufficient lines to analyze" |
| | } |
| | |
| | |
| | continuation_starters = [ |
| | 'and', 'but', 'or', 'nor', 'for', 'yet', 'so', |
| | 'as', 'when', 'while', 'before', 'after', 'since', 'until', 'because', 'although', 'though', |
| | 'with', 'without', 'through', 'throughout', 'beyond', 'beneath', 'under', 'over', 'into', 'onto', |
| | 'to', 'from', 'by', 'at', 'in', 'on', 'of', |
| | 'where', 'how', 'who', 'whom', 'whose', 'which', 'that', |
| | 'if', 'then', |
| | ] |
| | |
| | |
| | connected_lines = [] |
| | potential_groups = [] |
| | current_group = [0] |
| | |
| | for i in range(1, len(lines)): |
| | |
| | words = lines[i].lower().split() |
| | |
| | |
| | if not words: |
| | if len(current_group) > 1: |
| | potential_groups.append(current_group.copy()) |
| | current_group = [i] |
| | continue |
| | |
| | |
| | first_word = words[0].strip(',.!?;:') |
| | if first_word in continuation_starters: |
| | connected_lines.append(i) |
| | current_group.append(i) |
| | |
| | elif not first_word[0].isupper() and first_word[0].isalpha(): |
| | connected_lines.append(i) |
| | current_group.append(i) |
| | |
| | elif len(words) <= 3 and i < len(lines) - 1: |
| | |
| | if i+1 < len(lines): |
| | next_words = lines[i+1].lower().split() |
| | if next_words and next_words[0] in continuation_starters: |
| | connected_lines.append(i) |
| | current_group.append(i) |
| | else: |
| | |
| | if len(current_group) > 1: |
| | potential_groups.append(current_group.copy()) |
| | current_group = [i] |
| | else: |
| | |
| | if len(current_group) > 1: |
| | potential_groups.append(current_group.copy()) |
| | current_group = [i] |
| | |
| | |
| | if len(current_group) > 1: |
| | potential_groups.append(current_group) |
| | |
| | |
| | connected_groups = len(potential_groups) |
| | |
| | if connected_groups > 0: |
| | avg_lines_per_group = sum(len(group) for group in potential_groups) / connected_groups |
| | |
| | |
| | if connected_groups >= len(lines) / 3 and avg_lines_per_group >= 2.5: |
| | flow_quality = "Excellent - multiple connected thoughts across lines" |
| | elif connected_groups >= len(lines) / 4 and avg_lines_per_group >= 2: |
| | flow_quality = "Good - some connected thoughts across lines" |
| | elif connected_groups > 0: |
| | flow_quality = "Fair - limited connection between lines" |
| | else: |
| | flow_quality = "Poor - mostly independent lines" |
| | else: |
| | avg_lines_per_group = 0 |
| | flow_quality = "Poor - no connected thoughts detected" |
| | |
| | return { |
| | "connected_groups": connected_groups, |
| | "avg_lines_per_group": avg_lines_per_group, |
| | "flow_quality": flow_quality |
| | } |
| |
|
| | def enforce_syllable_limits(lines, max_syllables=6): |
| | """ |
| | Enforce syllable limits by splitting or truncating lines that are too long. |
| | Returns a modified list of lines where no line exceeds max_syllables. |
| | """ |
| | if not lines: |
| | return [] |
| | |
| | result_lines = [] |
| | |
| | for line in lines: |
| | words = line.split() |
| | if not words: |
| | continue |
| | |
| | |
| | syllable_count = sum(beat_analyzer.count_syllables(word) for word in words) |
| | |
| | |
| | if syllable_count <= max_syllables: |
| | result_lines.append(line) |
| | continue |
| | |
| | |
| | current_line = [] |
| | current_syllables = 0 |
| | |
| | for word in words: |
| | word_syllables = beat_analyzer.count_syllables(word) |
| | |
| | |
| | if current_syllables + word_syllables > max_syllables and current_line: |
| | result_lines.append(" ".join(current_line)) |
| | current_line = [word] |
| | current_syllables = word_syllables |
| | else: |
| | |
| | current_line.append(word) |
| | current_syllables += word_syllables |
| | |
| | |
| | if current_line: |
| | result_lines.append(" ".join(current_line)) |
| | |
| | return result_lines |
| |
|
| | |
| | def create_interface(): |
| | with gr.Blocks(title="Advanced Music Analysis & Beat-Matched Lyrics Generator") as demo: |
| | gr.Markdown("# π΅ Advanced Music Analysis & Beat-Matched Lyrics Generator") |
| | gr.Markdown("**Upload music to get comprehensive analysis and generate perfectly synchronized lyrics that match the rhythm, emotion, and structure of your audio**") |
| | |
| | with gr.Row(): |
| | with gr.Column(scale=1): |
| | audio_input = gr.Audio( |
| | label="π§ Upload or Record Audio", |
| | type="filepath", |
| | sources=["upload", "microphone"] |
| | ) |
| | |
| | |
| | custom_prompt_input = gr.Textbox( |
| | label="π¨ Custom Lyrics Requirements (Optional)", |
| | placeholder="e.g., 'Write about a rainy day in the city' or 'Include metaphors about flying' or 'Make it about overcoming challenges'", |
| | lines=3, |
| | info="Add any specific requirements, themes, or creative directions for the lyrics. This will be merged with the music analysis to create personalized lyrics." |
| | ) |
| | |
| | analyze_btn = gr.Button("π Analyze Music & Generate Lyrics", variant="primary", size="lg") |
| | |
| | with gr.Column(scale=2): |
| | with gr.Tab("π Music Analysis"): |
| | analysis_output = gr.Textbox(label="Comprehensive Music Analysis Results", lines=10) |
| | |
| | with gr.Row(): |
| | tempo_output = gr.Number(label="π₯ Tempo (BPM)") |
| | time_sig_output = gr.Textbox(label="β±οΈ Time Signature") |
| | |
| | with gr.Row(): |
| | primary_emotion_output = gr.Textbox(label="π Primary Emotion") |
| | secondary_emotion_output = gr.Textbox(label="π Secondary Emotion") |
| | |
| | with gr.Row(): |
| | primary_theme_output = gr.Textbox(label="π Primary Theme") |
| | secondary_theme_output = gr.Textbox(label="πͺ Secondary Theme") |
| | genre_output = gr.Textbox(label="πΌ Primary Genre") |
| | |
| | with gr.Tab("π€ Generated Lyrics"): |
| | lyrics_output = gr.Textbox(label="Beat-Synchronized Lyrics", lines=20) |
| | |
| | with gr.Tab("π― Beat Matching Analysis"): |
| | beat_match_output = gr.Markdown(label="Rhythm & Syllable Synchronization Analysis") |
| | |
| | |
| | analyze_btn.click( |
| | fn=process_audio, |
| | inputs=[audio_input, custom_prompt_input], |
| | outputs=[ |
| | analysis_output, lyrics_output, tempo_output, time_sig_output, |
| | primary_emotion_output, secondary_emotion_output, |
| | primary_theme_output, secondary_theme_output, |
| | genre_output, beat_match_output |
| | ] |
| | ) |
| | |
| | |
| | supported_genres_md = "\n".join([f"- **{genre.capitalize()}**: Optimized for {genre} music patterns" for genre in beat_analyzer.supported_genres]) |
| | |
| | gr.Markdown(f""" |
| | ## π How It Works |
| | |
| | 1. **π§ Upload Audio**: Support for various formats (MP3, WAV, etc.) or record directly in your browser |
| | 2. **π¨ Add Custom Requirements** (Optional): Specify your creative vision, themes, or style preferences |
| | 3. **π Advanced Analysis**: Multi-layered analysis including: |
| | - **Tempo & Time Signature**: Advanced detection using multiple algorithms |
| | - **Emotional Profiling**: 8-dimensional emotion mapping (happy, sad, excited, calm, etc.) |
| | - **Thematic Analysis**: Musical themes (love, triumph, adventure, reflection, etc.) |
| | - **Beat Pattern Extraction**: Precise rhythm and stress pattern identification |
| | - **Genre Classification**: AI-powered genre detection with confidence scores |
| | 4. **π€ Lyrics Generation**: AI creates perfectly synchronized lyrics that: |
| | - **Match Beat Patterns**: Each line aligns with musical phrases and rhythm |
| | - **Follow Syllable Constraints**: Precise syllable-to-beat mapping for natural flow |
| | - **Incorporate Emotions & Themes**: Blend detected musical characteristics |
| | - **Include Your Requirements**: Merge your creative directions seamlessly |
| | 5. **π Quality Analysis**: Comprehensive metrics showing beat matching accuracy and flow quality |
| | |
| | ## π¨ Custom Requirements Examples |
| | |
| | **π Themes**: "Write about nature and freedom", "Focus on urban nightlife", "Tell a story about friendship" |
| | |
| | **πΌοΈ Imagery**: "Use ocean metaphors", "Include references to stars and sky", "Focus on light and shadow" |
| | |
| | **ποΈ Perspective**: "From a child's viewpoint", "Make it nostalgic", "Focus on hope and resilience" |
| | |
| | **βοΈ Style**: "Use simple everyday language", "Include some rhyming", "Make it conversational" |
| | |
| | **π Content**: "Avoid sad themes", "Include words 'journey' and 'home'", "Focus on personal growth" |
| | |
| | The system intelligently blends your requirements with detected musical characteristics to create personalized, rhythm-perfect lyrics. |
| | |
| | ## π΅ Supported Genres for Full Lyrics Generation |
| | |
| | **β
Full Support** (Complete Analysis + Beat-Matched Lyrics): |
| | {supported_genres_md} |
| | |
| | These genres have consistent syllable-to-beat patterns that work optimally with our advanced rhythm-matching algorithm. |
| | |
| | **π Analysis Only**: All other genres receive comprehensive musical analysis (tempo, emotion, themes, etc.) without lyrics generation. |
| | |
| | ## π οΈ Advanced Features |
| | |
| | - **π― Beat Synchronization**: Syllable-perfect alignment with musical phrases |
| | - **π§ Emotion Integration**: Lyrics reflect detected emotional characteristics |
| | - **π Theme Incorporation**: Musical themes guide lyrical content |
| | - **π Quality Metrics**: Detailed analysis of rhythm matching accuracy |
| | - **π Flow Optimization**: Natural sentence continuation across lines |
| | - **βοΈ Genre Optimization**: Tailored patterns for different musical styles |
| | """) |
| | |
| | return demo |
| |
|
| | |
| | demo = create_interface() |
| |
|
| | if __name__ == "__main__": |
| | demo.launch() |
| | else: |
| | |
| | app = demo |