adjusted researchcut2segments.py to work with final version of speech annotation #7

Closed
chrhaeusler wants to merge 27 commits from master into master
227 changed files with 958 additions and 890 deletions

0
.datalad/.gitattributes vendored Normal file → Executable file
View file

0
.datalad/config Normal file → Executable file
View file

0
.gitattributes vendored Normal file → Executable file
View file

3
.gitmodules vendored Normal file → Executable file
View file

@ -13,3 +13,6 @@
[submodule "src/bodycontact"] [submodule "src/bodycontact"]
path = src/bodycontact path = src/bodycontact
url = https://github.com/psychoinformatics-de/studyforrest-paper-bodycontactannotation.git url = https://github.com/psychoinformatics-de/studyforrest-paper-bodycontactannotation.git
[submodule "src/voice"]
path = src/voice
url = https://github.com/chrhaeusler/studyforrest-paper-speechannotation

0
LICENSE Normal file → Executable file
View file

0
README.md Normal file → Executable file
View file

View file

@ -0,0 +1,29 @@
#!/usr/bin/python
'''
'''
from os.path import join as opj
import pandas as pd
# read the annotation
# TO DO: skiprows flagged with '???' here already?
df = pd.read_csv(opj('src', 'voice', 'data', 'audio-description-by-word.csv'))
# # drop rows with whole sentences as flagged with '???' in column 1
df = df.loc[df.iloc[:,1] != '???']
# reset index
# df.index = range(0, len(df))
# convert the cleaned columns to float64
df.iloc[:, 0:2] = df.iloc[:, 0:2].astype('float64')
# replace column 1 (end) with duration (end - start)
df.iloc[:,1] = df.iloc[:,1] - df.iloc[:,0]
# apply BIDS standard column names
df.rename(columns=dict(start='onset', end='duration'), inplace=True)
df.to_csv(
opj('researchcut', 'audio-description-by-word.tsv'),
index=False,
sep='\t',
float_format='%.3f')

View file

@ -17,5 +17,6 @@ df.insert(1, 'duration', shot_durations)
df.to_csv( df.to_csv(
opj('researchcut', 'locations.tsv'), opj('researchcut', 'locations.tsv'),
index=False,
sep='\t', sep='\t',
index=False) float_format='%.3f')

View file

@ -0,0 +1,48 @@
#!/usr/bin/python
'''
'''
from os.path import join as opj
import pandas as pd
def time_stamp_to_sec(t_stamp='01:50:34:01'):
'''
Input:
time stamp (str) in format HH:MM:SS:Frame
Output:
time point in seconds (float)
'''
splitted_stamp = t_stamp.split(':')
milliseconds = (int(splitted_stamp[0]) * 60 * 60 * 1000) +\
(int(splitted_stamp[1]) * 60 * 1000) +\
(int(splitted_stamp[2]) * 1000) +\
(int(splitted_stamp[3]) * 40)
seconds = milliseconds / 1000.0
return seconds
# read the annotation
df = pd.read_csv(opj('src', 'voice', 'speech_vocalization.csv'))
# filter for rows that contain an #-flag indicating missing timing
df = df.loc[df.iloc[:, 0].str.contains('#') == False]
df = df.loc[df.iloc[:, 1].str.contains('#') == False]
# convert time stamps to
df.iloc[:, 0] = df.iloc[:, 0].apply(time_stamp_to_sec)
df.iloc[:, 1] = df.iloc[:, 1].apply(time_stamp_to_sec)
# replace column 1 (end) with duration (end - start)
df.iloc[:,1] = df.iloc[:,1] - df.iloc[:,0]
# apply BIDS standard column names
df.rename(columns=dict(start='onset', end='duration'), inplace=True)
df.to_csv(
opj('researchcut', 'speech_vocalization.tsv'),
index=False,
sep='\t',
float_format='%.3f')

View file

@ -199,25 +199,14 @@ if __name__ == "__main__":
# correct for the stimulus used to annotate the audiotrack # correct for the stimulus used to annotate the audiotrack
if annotated_time == 'aomovie': if annotated_time == 'aomovie':
# the files # first, correct for the offset between the (unshifted) audio
# forrestgump_researchcut_ad_ger.flac and # description and audiovisual movie
# german_dvd_5.1_48000hz_488kb_research_cut_aligned_cutted_narrator_muted_48000Hz.flac # it turned out the offset is varying +/- one frame (40 ms) around 0
# (that contain the audio description) were originally lagging # across the course of the whole stimuli
# behind for XYZ msec and were shiftet forward
# by one frame (40ms) in respect to the reference file
# forrestgump_researchcut_ger.mkv
# 1st, correct for shifting the narrator (incl. dialogue) 40ms
# to the front before annotating the narrator/dialogue
onset_in_seg += 0.040
# 2nd, correct for the offset between the (unshifted) audio
# description and the audiovisual movie
# -> the offset is varying +/- one frame (40 ms) around 0
onset_in_seg -= 0.000 onset_in_seg -= 0.000
# 3rd, correct for the offset between whole stimulus # second, correct for the offset between whole stimulus
# (audiovisual or audio-only) and its segments # (audiovisual or audio-description) and its segments
if target_time == 'avmovie': if target_time == 'avmovie':
onset_in_seg = fix_audio_movie_segments( onset_in_seg = fix_audio_movie_segments(
AUDIO_AV_OFFSETS, AUDIO_AV_OFFSETS,
@ -237,10 +226,8 @@ if __name__ == "__main__":
# all splendid for now # all splendid for now
pass pass
else:
raise ValueError('%s is an unknown annotation', basename(input_file))
row['onset'] = round(onset_in_seg, 3) row['onset'] = round(onset_in_seg, 3)
row['duration'] = round(row['duration'], 3)
# append that shit # append that shit
run_events[run].append(row) run_events[run].append(row)

0
old/audio/music.csv Normal file → Executable file
View file

View file

View file

View file

Can't render this file because it is too large.

View file

Can't render this file because it is too large.

0
old/emotions/audio-only/timeseries/ioats_1s_ao_boy.csv Normal file → Executable file
View file

Can't render this file because it is too large.

View file

Can't render this file because it is too large.

View file

Can't render this file because it is too large.

0
old/emotions/audio-only/timeseries/ioats_1s_ao_dan.csv Normal file → Executable file
View file

Can't render this file because it is too large.

View file

Can't render this file because it is too large.

View file

Can't render this file because it is too large.

View file

Can't render this file because it is too large.

View file

Can't render this file because it is too large.

View file

Can't render this file because it is too large.

View file

Can't render this file because it is too large.

View file

Can't render this file because it is too large.

View file

Can't render this file because it is too large.

View file

Can't render this file because it is too large.

0
old/emotions/audio-only/timeseries/ioats_1s_ao_man.csv Normal file → Executable file
View file

Can't render this file because it is too large.

0
old/emotions/audio-only/timeseries/ioats_1s_ao_men.csv Normal file → Executable file
View file

Can't render this file because it is too large.

View file

Can't render this file because it is too large.

View file

Can't render this file because it is too large.

View file

Can't render this file because it is too large.

View file

Can't render this file because it is too large.

View file

Can't render this file because it is too large.

View file

Can't render this file because it is too large.

View file

Can't render this file because it is too large.

View file

Can't render this file because it is too large.

View file

Can't render this file because it is too large.

View file

Can't render this file because it is too large.

View file

Can't render this file because it is too large.

View file

View file

0
old/emotions/audio-only/timeseries/ioats_2s_ao_boy.csv Normal file → Executable file
View file

View file

View file

0
old/emotions/audio-only/timeseries/ioats_2s_ao_dan.csv Normal file → Executable file
View file

View file

View file

View file

View file

View file

View file

View file

View file

View file

0
old/emotions/audio-only/timeseries/ioats_2s_ao_man.csv Normal file → Executable file
View file

0
old/emotions/audio-only/timeseries/ioats_2s_ao_men.csv Normal file → Executable file
View file

View file

View file

View file

View file

View file

View file

View file

View file

View file

View file

View file

View file

View file

View file

View file

View file

View file

View file

View file

View file

View file

View file

View file

View file

View file

View file

View file

View file

View file

View file

View file

View file

View file

View file

View file

View file

View file

View file

View file

View file

View file

View file

Can't render this file because it is too large.

Some files were not shown because too many files have changed in this diff Show more