6 changed files with 952 additions and 870 deletions
--- a/.gitmodules
+++ b/.gitmodules
@ -13,3 +13,6 @@
 [submodule "src/bodycontact"]
 	path = src/bodycontact
 	url = https://github.com/psychoinformatics-de/studyforrest-paper-bodycontactannotation.git
 [submodule "src/voice"]
 	path = src/voice
 	url = /home/chris/psyinfo/forrest_gump/anno_speech
--- a/code/importer/locations.py
+++ b/code/importer/locations.py
@ -17,5 +17,6 @@ df.insert(1, 'duration', shot_durations)
 df.to_csv(
    opj('researchcut', 'locations.tsv'),
    index=False,
    sep='\t',
-    index=False)
+    float_format='%.3f')
--- a/code/importer/speech_narrator.py
+++ b/code/importer/speech_narrator.py
@ -0,0 +1,29 @@
 #!/usr/bin/python
 '''
 '''
 from os.path import join as opj
 import pandas as pd
 # read the annotation
 # TO DO: skiprows flagged with '???' here already?
 df = pd.read_csv(opj('src', 'voice', 'speech_narrator.csv'))
 # # drop rows with whole sentences as flagged with '???' in column 1
 df = df.loc[df.iloc[:,1] != '???']
 # reset index
 # df.index = range(0, len(df))
 # convert the cleaned columns to float64
 df.iloc[:, 0:2] = df.iloc[:, 0:2].astype('float64')
 # replace column 1 (end) with duration (end - start)
 df.iloc[:,1] = df.iloc[:,1] - df.iloc[:,0]
 # apply BIDS standard column names
 df.rename(columns=dict(start='onset', end='duration'), inplace=True)
 df.to_csv(
    opj('researchcut', 'speech_narrator.tsv'),
    index=False,
    sep='\t',
    float_format='%.3f')
--- a/code/importer/speech_vocalization.py
+++ b/code/importer/speech_vocalization.py
@ -0,0 +1,48 @@
 #!/usr/bin/python
 '''
 '''
 from os.path import join as opj
 import pandas as pd
 def time_stamp_to_sec(t_stamp='01:50:34:01'):
    '''
    Input:
        time stamp (str) in format HH:MM:SS:Frame
    Output:
        time point in seconds (float)
    '''
    splitted_stamp = t_stamp.split(':')
    milliseconds = (int(splitted_stamp[0]) * 60 * 60 * 1000) +\
                        (int(splitted_stamp[1]) * 60 * 1000) +\
                        (int(splitted_stamp[2]) * 1000) +\
                        (int(splitted_stamp[3]) * 40)
    seconds = milliseconds / 1000.0
    return seconds
 # read the annotation
 df = pd.read_csv(opj('src', 'voice', 'speech_vocalization.csv'))
 # filter for rows that contain an #-flag indicating missing timing
 df = df.loc[df.iloc[:, 0].str.contains('#') == False]
 df = df.loc[df.iloc[:, 1].str.contains('#') == False]
 # convert time stamps to
 df.iloc[:, 0] = df.iloc[:, 0].apply(time_stamp_to_sec)
 df.iloc[:, 1] = df.iloc[:, 1].apply(time_stamp_to_sec)
 # replace column 1 (end) with duration (end - start)
 df.iloc[:,1] = df.iloc[:,1] - df.iloc[:,0]
 # apply BIDS standard column names
 df.rename(columns=dict(start='onset', end='duration'), inplace=True)
 df.to_csv(
    opj('researchcut', 'speech_vocalization.tsv'),
    index=False,
    sep='\t',
    float_format='%.3f')
--- a/researchcut/locations.tsv
+++ b/researchcut/locations.tsv
--- a/src/voice
+++ b/src/voice
@ -0,0 +1 @@
 Subproject commit 82ae3a633d82d5c467dc7678b44df09f5283e4ef
		`@ -0,0 +1 @@`
							`Subproject commit 82ae3a633d82d5c467dc7678b44df09f5283e4ef`