adjusted researchcut2segments.py to work with final version of speech annotation #7

Closed
chrhaeusler wants to merge 27 commits from master into master
227 changed files with 958 additions and 890 deletions

0
.datalad/.gitattributes vendored Normal file → Executable file
View file

0
.datalad/config Normal file → Executable file
View file

0
.gitattributes vendored Normal file → Executable file
View file

3
.gitmodules vendored Normal file → Executable file
View file

@ -13,3 +13,6 @@
[submodule "src/bodycontact"]
path = src/bodycontact
url = https://github.com/psychoinformatics-de/studyforrest-paper-bodycontactannotation.git
[submodule "src/voice"]
path = src/voice
url = https://github.com/chrhaeusler/studyforrest-paper-speechannotation

0
LICENSE Normal file → Executable file
View file

0
README.md Normal file → Executable file
View file

View file

@ -0,0 +1,29 @@
#!/usr/bin/python
'''
'''
from os.path import join as opj
import pandas as pd
# read the annotation
# TO DO: skiprows flagged with '???' here already?
df = pd.read_csv(opj('src', 'voice', 'data', 'audio-description-by-word.csv'))
# # drop rows with whole sentences as flagged with '???' in column 1
df = df.loc[df.iloc[:,1] != '???']
# reset index
# df.index = range(0, len(df))
# convert the cleaned columns to float64
df.iloc[:, 0:2] = df.iloc[:, 0:2].astype('float64')
# replace column 1 (end) with duration (end - start)
df.iloc[:,1] = df.iloc[:,1] - df.iloc[:,0]
# apply BIDS standard column names
df.rename(columns=dict(start='onset', end='duration'), inplace=True)
df.to_csv(
opj('researchcut', 'audio-description-by-word.tsv'),
index=False,
sep='\t',
float_format='%.3f')

View file

@ -17,5 +17,6 @@ df.insert(1, 'duration', shot_durations)
df.to_csv(
opj('researchcut', 'locations.tsv'),
index=False,
sep='\t',
index=False)
float_format='%.3f')

View file

@ -0,0 +1,48 @@
#!/usr/bin/python
'''
'''
from os.path import join as opj
import pandas as pd
def time_stamp_to_sec(t_stamp='01:50:34:01'):
'''
Input:
time stamp (str) in format HH:MM:SS:Frame
Output:
time point in seconds (float)
'''
splitted_stamp = t_stamp.split(':')
milliseconds = (int(splitted_stamp[0]) * 60 * 60 * 1000) +\
(int(splitted_stamp[1]) * 60 * 1000) +\
(int(splitted_stamp[2]) * 1000) +\
(int(splitted_stamp[3]) * 40)
seconds = milliseconds / 1000.0
return seconds
# read the annotation
df = pd.read_csv(opj('src', 'voice', 'speech_vocalization.csv'))
# filter for rows that contain an #-flag indicating missing timing
df = df.loc[df.iloc[:, 0].str.contains('#') == False]
df = df.loc[df.iloc[:, 1].str.contains('#') == False]
# convert time stamps to
df.iloc[:, 0] = df.iloc[:, 0].apply(time_stamp_to_sec)
df.iloc[:, 1] = df.iloc[:, 1].apply(time_stamp_to_sec)
# replace column 1 (end) with duration (end - start)
df.iloc[:,1] = df.iloc[:,1] - df.iloc[:,0]
# apply BIDS standard column names
df.rename(columns=dict(start='onset', end='duration'), inplace=True)
df.to_csv(
opj('researchcut', 'speech_vocalization.tsv'),
index=False,
sep='\t',
float_format='%.3f')

View file

@ -199,25 +199,14 @@ if __name__ == "__main__":
# correct for the stimulus used to annotate the audiotrack
if annotated_time == 'aomovie':
# the files
# forrestgump_researchcut_ad_ger.flac and
# german_dvd_5.1_48000hz_488kb_research_cut_aligned_cutted_narrator_muted_48000Hz.flac
# (that contain the audio description) were originally lagging
# behind for XYZ msec and were shiftet forward
# by one frame (40ms) in respect to the reference file
# forrestgump_researchcut_ger.mkv
# 1st, correct for shifting the narrator (incl. dialogue) 40ms
# to the front before annotating the narrator/dialogue
onset_in_seg += 0.040
# 2nd, correct for the offset between the (unshifted) audio
# description and the audiovisual movie
# -> the offset is varying +/- one frame (40 ms) around 0
# first, correct for the offset between the (unshifted) audio
# description and audiovisual movie
# it turned out the offset is varying +/- one frame (40 ms) around 0
# across the course of the whole stimuli
onset_in_seg -= 0.000
# 3rd, correct for the offset between whole stimulus
# (audiovisual or audio-only) and its segments
# second, correct for the offset between whole stimulus
# (audiovisual or audio-description) and its segments
if target_time == 'avmovie':
onset_in_seg = fix_audio_movie_segments(
AUDIO_AV_OFFSETS,
@ -237,10 +226,8 @@ if __name__ == "__main__":
# all splendid for now
pass
else:
raise ValueError('%s is an unknown annotation', basename(input_file))
row['onset'] = round(onset_in_seg, 3)
row['duration'] = round(row['duration'], 3)
# append that shit
run_events[run].append(row)

0
old/audio/music.csv Normal file → Executable file
View file

View file

View file

View file

Can't render this file because it is too large.

View file

Can't render this file because it is too large.

0
old/emotions/audio-only/timeseries/ioats_1s_ao_boy.csv Normal file → Executable file
View file

Can't render this file because it is too large.

View file

Can't render this file because it is too large.

View file

Can't render this file because it is too large.

0
old/emotions/audio-only/timeseries/ioats_1s_ao_dan.csv Normal file → Executable file
View file

Can't render this file because it is too large.

View file

Can't render this file because it is too large.

View file

Can't render this file because it is too large.

View file

Can't render this file because it is too large.

View file

Can't render this file because it is too large.

View file

Can't render this file because it is too large.

View file

Can't render this file because it is too large.

View file

Can't render this file because it is too large.

View file

Can't render this file because it is too large.

View file

Can't render this file because it is too large.

0
old/emotions/audio-only/timeseries/ioats_1s_ao_man.csv Normal file → Executable file
View file

Can't render this file because it is too large.

0
old/emotions/audio-only/timeseries/ioats_1s_ao_men.csv Normal file → Executable file
View file

Can't render this file because it is too large.

View file

Can't render this file because it is too large.

View file

Can't render this file because it is too large.

View file

Can't render this file because it is too large.

View file

Can't render this file because it is too large.

View file

Can't render this file because it is too large.

View file

Can't render this file because it is too large.

View file

Can't render this file because it is too large.

View file

Can't render this file because it is too large.

View file

Can't render this file because it is too large.

View file

Can't render this file because it is too large.

View file

Can't render this file because it is too large.

View file

View file

0
old/emotions/audio-only/timeseries/ioats_2s_ao_boy.csv Normal file → Executable file
View file

View file

View file

0
old/emotions/audio-only/timeseries/ioats_2s_ao_dan.csv Normal file → Executable file
View file

View file

View file

View file

View file

View file

View file

View file

View file

View file

0
old/emotions/audio-only/timeseries/ioats_2s_ao_man.csv Normal file → Executable file
View file

0
old/emotions/audio-only/timeseries/ioats_2s_ao_men.csv Normal file → Executable file
View file

View file

View file

View file

View file

View file

View file

View file

View file

View file

View file

View file

View file

View file

View file

View file

View file

View file

View file

View file

View file

View file

View file

View file

View file

View file

View file

View file

View file

View file

View file

View file

View file

View file

View file

View file

View file

View file

View file

View file

View file

View file

View file

Can't render this file because it is too large.

Some files were not shown because too many files have changed in this diff Show more