"""Calculates Emotion Diarization Error Rate (EDER) which is the sum of Missed Emotion (ME),
False Alarm (FA), and Confusion (CF).
Authors
* Yingzhi Wang 2023
"""
[docs]def EDER(prediction, id, duration, emotion, window_length, stride):
""" Calculates the EDER value
Args:
prediction (list): a list of frame-wise predictions of the utterance
id (str): id of the utterance
duration (float): duration of the utterance
emotion (list of dicts): the ground truth emotion and its duration,
e.g. [{'emo': 'angry', 'start': 1.016, 'end': 6.336}]
window_length (float): the frame length used for frame-wise prediction
stride (float): the frame length used for frame-wise prediction
Returns:
float: the calculted EDER for the utterance
Example
-------
>>> from speechbrain.utils.EDER import EDER
>>> prediction=['n', 'n', 'n', 'a', 'a', 'a']
>>> id="spk1_1"
>>> duration=1.22
>>> emotion=[{'emo': 'angry', 'start': 0.39, 'end': 1.10}]
>>> window_length = 0.2
>>> stride = 0.2
>>> EDER(prediction, id, duration, emotion, window_length, stride)
0.2704918032786885
"""
duration = float(duration) # for recipe tests
lol = []
for i in range(len(prediction)):
start = stride * i
end = start + window_length
lol.append([id, start, end, prediction[i]])
lol = merge_ssegs_same_emotion_adjacent(lol)
if len(lol) != 1:
lol = distribute_overlap(lol)
ref = reference_to_lol(id, duration, emotion)
good_preds = 0
for i in ref:
candidates = [element for element in lol if element[3] == i[3]]
ref_interval = [i[1], i[2]]
for candidate in candidates:
overlap = getOverlap(ref_interval, [candidate[1], candidate[2]])
good_preds += overlap
return 1 - good_preds / duration
[docs]def getOverlap(a, b):
""" get the overlapped length of two intervals
Arguments
---------
a : list
b : list
Returns:
float: overlapped length
Example
-------
>>> from speechbrain.utils.EDER import getOverlap
>>> interval1=[1.2, 3.4]
>>> interval2=[2.3, 4.5]
>>> getOverlap(interval1, interval2)
1.1
"""
return max(0, min(a[1], b[1]) - max(a[0], b[0]))
[docs]def is_overlapped(end1, start2):
"""Returns True if segments are overlapping.
Arguments
---------
end1 : float
End time of the first segment.
start2 : float
Start time of the second segment.
Returns
-------
overlapped : bool
True of segments overlapped else False.
Example
-------
>>> from speechbrain.processing import diarization as diar
>>> diar.is_overlapped(5.5, 3.4)
True
>>> diar.is_overlapped(5.5, 6.4)
False
"""
if start2 > end1:
return False
else:
return True
[docs]def merge_ssegs_same_emotion_adjacent(lol):
"""Merge adjacent sub-segs if they are the same emotion.
Arguments
---------
lol : list of list
Each list contains [utt_id, sseg_start, sseg_end, emo_label].
Returns
-------
new_lol : list of list
new_lol contains adjacent segments merged from the same emotion ID.
Example
-------
>>> from speechbrain.utils.EDER import merge_ssegs_same_emotion_adjacent
>>> lol=[['u1', 0.0, 7.0, 'a'],
... ['u1', 7.0, 9.0, 'a'],
... ['u1', 9.0, 11.0, 'n'],
... ['u1', 11.0, 13.0, 'n'],
... ['u1', 13.0, 15.0, 'n'],
... ['u1', 15.0, 16.0, 'a']]
>>> merge_ssegs_same_emotion_adjacent(lol)
[['u1', 0.0, 9.0, 'a'], ['u1', 9.0, 15.0, 'n'], ['u1', 15.0, 16.0, 'a']]
"""
new_lol = []
# Start from the first sub-seg
sseg = lol[0]
flag = False
for i in range(1, len(lol)):
next_sseg = lol[i]
# IF sub-segments overlap AND has same emotion THEN merge
if is_overlapped(sseg[2], next_sseg[1]) and sseg[3] == next_sseg[3]:
sseg[2] = next_sseg[2] # just update the end time
# This is important. For the last sseg, if it is the same emotion then merge
# Make sure we don't append the last segment once more. Hence, set FLAG=True
if i == len(lol) - 1:
flag = True
new_lol.append(sseg)
else:
new_lol.append(sseg)
sseg = next_sseg
# Add last segment only when it was skipped earlier.
if flag is False:
new_lol.append(lol[-1])
return new_lol
[docs]def reference_to_lol(id, duration, emotion):
"""change reference to a list of list
Arguments
---------
id (str): id of the utterance
duration (float): duration of the utterance
emotion (list of dicts): the ground truth emotion and its duration,
e.g. [{'emo': 'angry', 'start': 1.016, 'end': 6.336}]
Returns
-------
lol : list of list
It has each list structure as [rec_id, sseg_start, sseg_end, spkr_id].
Example
-------
>>> from speechbrain.utils.EDER import reference_to_lol
>>> id="u1"
>>> duration=8.0
>>> emotion=[{'emo': 'angry', 'start': 1.016, 'end': 6.336}]
>>> reference_to_lol(id, duration, emotion)
[['u1', 0, 1.016, 'n'], ['u1', 1.016, 6.336, 'a'], ['u1', 6.336, 8.0, 'n']]
"""
assert (
len(emotion) == 1
), "NotImplementedError: The solution is only implemented for one-emotion utterance for now."
lol = []
start = emotion[0]["start"]
end = emotion[0]["end"]
if start > 0:
lol.append([id, 0, start, "n"])
lol.append([id, start, end, emotion[0]["emo"][0]])
duration = float(duration) # for recipe tests
if end < duration:
lol.append([id, end, duration, "n"])
return lol
[docs]def distribute_overlap(lol):
"""Distributes the overlapped speech equally among the adjacent segments
with different emotions.
Arguments
---------
lol : list of list
It has each list structure as [rec_id, sseg_start, sseg_end, spkr_id].
Returns
-------
new_lol : list of list
It contains the overlapped part equally divided among the adjacent
segments with different emotion IDs.
Example
-------
>>> from speechbrain.processing import diarization as diar
>>> lol = [['r1', 5.5, 9.0, 's1'],
... ['r1', 8.0, 11.0, 's2'],
... ['r1', 11.5, 13.0, 's2'],
... ['r1', 12.0, 15.0, 's1']]
>>> diar.distribute_overlap(lol)
[['r1', 5.5, 8.5, 's1'], ['r1', 8.5, 11.0, 's2'], ['r1', 11.5, 12.5, 's2'], ['r1', 12.5, 15.0, 's1']]
"""
new_lol = []
sseg = lol[0]
# Add first sub-segment here to avoid error at: "if new_lol[-1] != sseg:" when new_lol is empty
# new_lol.append(sseg)
for i in range(1, len(lol)):
next_sseg = lol[i]
# No need to check if they are different emotions.
# Because if segments are overlapped then they always have different emotions.
# This is because similar emotion's adjacent sub-segments are already merged by "merge_ssegs_same_emotion()"
if is_overlapped(sseg[2], next_sseg[1]):
# Get overlap duration.
# Now this overlap will be divided equally between adjacent segments.
overlap = sseg[2] - next_sseg[1]
# Update end time of old seg
sseg[2] = sseg[2] - (overlap / 2.0)
# Update start time of next seg
next_sseg[1] = next_sseg[1] + (overlap / 2.0)
if len(new_lol) == 0:
# For first sub-segment entry
new_lol.append(sseg)
else:
# To avoid duplicate entries
if new_lol[-1] != sseg:
new_lol.append(sseg)
# Current sub-segment is next sub-segment
sseg = next_sseg
else:
# For the first sseg
if len(new_lol) == 0:
new_lol.append(sseg)
else:
# To avoid duplicate entries
if new_lol[-1] != sseg:
new_lol.append(sseg)
# Update the current sub-segment
sseg = next_sseg
# Add the remaining last sub-segment
new_lol.append(next_sseg)
return new_lol