import msaf
import pandas as pd
pd.set_option('precision', 4)
import numpy as np
import musicntd.scripts.overall_scripts as scr
import musicntd.data_manipulation as dm
In this notebook are presented the results of the baseline, computed with MSAF.
We restricted the baseline to three algorithms:
desired_algos = ["cnmf", "foote", "scluster"]
folder = "C:\\Users\\amarmore\\Desktop\\Audio samples\\RWC Pop\\Entire RWC"
annotations_type = "MIREX10"
annotations_folder = "C:\\Users\\amarmore\\Desktop\\Audio samples\\RWC Pop\\annotations\\{}".format(annotations_type)
Below is the code to compute these scores.
def parse_all_algos(song_path, references_segments, bars):
# Function which computes all frontiers for this song by the desired algorithms,
# and then computes segmentation scores.
zero_five_to_return = []
three_to_return = []
five_rates = []
three_rates = []
for algo in desired_algos:
boundaries, _ = msaf.process(song_path, boundaries_id=algo)
segments = np.array(dm.frontiers_to_segments(boundaries))
zero_five_to_return.append(dm.compute_score_of_segmentation(
references_segments, segments, window_length=0.5))
three_to_return.append(dm.compute_score_of_segmentation(
references_segments, segments, window_length=3))
five_rates.append(dm.compute_rates_of_segmentation(
references_segments, segments, window_length=0.5))
original_rates = dm.compute_rates_of_segmentation(
references_segments, segments, window_length=3)
three_rates.append(original_rates)
aligned_segments = dm.align_segments_on_bars(segments, bars)
zero_five_to_return.append(dm.compute_score_of_segmentation(
references_segments, aligned_segments, window_length=0.5))
three_to_return.append(dm.compute_score_of_segmentation(
references_segments, aligned_segments, window_length=3))
five_rates.append(dm.compute_rates_of_segmentation(
references_segments, aligned_segments, window_length=0.5))
aligned_rates = dm.compute_rates_of_segmentation(
references_segments, aligned_segments, window_length=3)
three_rates.append(aligned_rates)
return zero_five_to_return, three_to_return, five_rates, three_rates
# Script which parses all songs of RWC, computes its frontiers for all algorithms, and then
zero_point_five_results = []
three_seconds_results = []
five_rates_results = []
three_rates_results = []
paths = scr.load_RWC_dataset(folder, annotations_type = annotations_type)
persisted_path = "C:\\Users\\amarmore\\Desktop\\data_persisted\\"
for song_and_annotations in paths:
song_path = folder + "\\" + song_and_annotations[0]
print(song_and_annotations[0])
annot_path = "{}\\{}".format(annotations_folder, song_and_annotations[1])
annotations = dm.get_segmentation_from_txt(annot_path, annotations_type)
references_segments = np.array(annotations)[:,0:2]
bars = scr.load_or_save_bars(persisted_path, song_path)
this_zero, this_three, five_rates, three_rates = parse_all_algos(song_path, references_segments, bars)
zero_point_five_results.append(this_zero)
three_seconds_results.append(this_three)
five_rates_results.append(five_rates)
three_rates_results.append(three_rates)
Finally, we print in a nice way the scores computed on the baseline.
zerofive = np.array(zero_point_five_results)
three = np.array(three_seconds_results)
all_algos = [alg for alg in desired_algos]
params = ['Original', 'Aligned on downbeats']
line = []
subline = []
for i in all_algos:
for j in params:
line.append(i)
subline.append(j)
arr = []
col = [np.array(['0.5 seconds','0.5 seconds','0.5 seconds','3 seconds','3 seconds','3 seconds']),
np.array(['Precision', 'Recall', 'F measure','Precision', 'Recall', 'F measure'])]
nested_lines = [np.array(line), np.array(subline)]
for i in range(len(line)):
arr.append([np.mean(zerofive[:,i,0]),np.mean(zerofive[:,i,1]), np.mean(zerofive[:,i,2]),
np.mean(three[:,i,0]),np.mean(three[:,i,1]), np.mean(three[:,i,2])])
pd.DataFrame(np.array(arr), index=nested_lines, columns=col)
[1] Nieto, O., & Jehan, T. (2013, May). Convex non-negative matrix factorization for automatic music structure identification. In 2013 IEEE International Conference on Acoustics, Speech and Signal Processing (pp. 236-240). IEEE.
[2] Foote, J. (2000, July). Automatic audio segmentation using a measure of audio novelty. In 2000 IEEE International Conference on Multimedia and Expo. ICME2000. Proceedings. Latest Advances in the Fast Changing World of Multimedia (Cat. No. 00TH8532) (Vol. 1, pp. 452-455). IEEE.
[3] McFee, B., & Ellis, D. (2014). Analyzing Song Structure with Spectral Clustering. In ISMIR (pp. 405-410).