Modernize #18

Merged
mih merged 9 commits from modernize into master 2021-07-11 15:39:28 +00:00
5 changed files with 166 additions and 160 deletions

View file

@ -1,18 +1,37 @@
# use `chronic` to make output look neater, if available
CHRONIC=$(shell which chronic || echo '' )
PYTHON=python
all: main.pdf all: main.pdf
main.pdf: main.tex references.bib results_def.tex figures # important to process stats and figures first, such that
latexmk -pdf -g $< # up-to-date versions are compiled into the manuscript
main.pdf: main.tex results_def.tex references.bib
results_def.tex: code/mk_figuresnstats.py @echo "# Render figures"
bash -c 'set -o pipefail; code/mk_figuresnstats.py -s | tee results_def.tex'
figures: figures-stamp
figures-stamp: code/mk_figuresnstats.py
code/mk_figuresnstats.py -f -r -m
$(MAKE) -C img $(MAKE) -C img
touch $@ @echo "# Render manuscript"
@$(CHRONIC) latexmk -pdf -g $<
# the stats-script outputs all scores and figures
results_def.tex: code/mk_figuresnstats.py
@test -z "$$VIRTUAL_ENV" && \
echo "ERROR: must be executed in a virtual env (set VIRTUAL_ENV to fake one)" && \
exit 1 || true
@echo "# Ensure REMODNAV installation"
@python -m pip install pandas seaborn sklearn datalad
@datalad get -n remodnav
@$(CHRONIC) pip install -e remodnav
@rm -f $@
@REMODNAV_RESULTS=$@ $(PYTHON) code/mk_figuresnstats.py -s -f -r -m
clean: clean:
rm -f main.bbl main.aux main.blg main.log main.out main.pdf main.tdo main.fls main.fdb_latexmk example.eps img/*eps-converted-to.pdf texput.log results_def.tex figures-stamp rm -f main.bbl main.aux main.blg main.log main.out main.pdf main.tdo \
main.fls main.fdb_latexmk texput.log \
results_def.tex
$(MAKE) -C img clean $(MAKE) -C img clean
virtualenv:
.PHONY: clean

View file

@ -6,35 +6,28 @@ This repository contains the raw data, the code to generate summary statistics,
To recompute results and compile the paper, do the following: To recompute results and compile the paper, do the following:
[Optional] create a virtual environment: - Create a [virtual environment](https://docs.python.org/3/tutorial/venv.html) and activate it:
# create and enter a new virtual environment (optional) ```
# one way to create a virtual environment:
virtualenv --python=python3 ~/env/remodnav virtualenv --python=python3 ~/env/remodnav
. ~/env/remodnav/bin/activate . ~/env/remodnav/bin/activate
```
- if you haven't yet, install [``remodnav``](https://github.com/psychoinformatics-de/remodnav), ``seaborn``, and - ``clone`` the repository with ``git clone https://github.com/psychoinformatics-de/paper-remodnav.git``
[``datalad``](https://www.datalad.org). Depending on your operating system, datalad can be installed via - Navigate into the repository and run ``make``
``pip install datalad`` or ``sudo apt-get install datalad`` (please check the
[docs](http://docs.datalad.org/en/latest/gettingstarted.html) if you are unsure which option is applicable to your system)
Install from [PyPi](https://pypi.org/project/remodnav): Appropriate Makefiles within the directory will install necessary Python requirements (the ``remodnav`` Python package, ``datalad``, ``pandas``, ``seaborn``, and ``sklearn``), execute data retrieval via [DataLad](http://datalad.org) (about 550MB in total),
# install from PyPi
pip install remodnav seaborn sklearn
# if not installed with another method
pip install datalad
- ``datalad clone`` the repository with ``datalad clone https://github.com/psychoinformatics-de/paper-remodnav.git``
- Appropriate Makefiles within the directory will execute data retrieval via datalad (about 550MB in total),
compute the results and figures from ``code/mk_figuresnstats.py``, insert the results and rendered figures in the compute the results and figures from ``code/mk_figuresnstats.py``, insert the results and rendered figures in the
main.tex file, and render the PDF with a single call from the root of the directory: ``make`` main.tex file, and render the PDF.
- Note that [inkscape](https://inkscape.org/de/release/inkscape-0.92.4/), [latexmk](https://mg.readthedocs.io/latexmk.html),
and [texlive-latex-extra](https://wiki.ubuntuusers.de/TeX_Live/) need to be installed on your system to render the figures and the PDF.
The full PDF will be ``main.pdf``. The full PDF will be ``main.pdf``.
## Software requirements
Note that [inkscape](https://inkscape.org/de/release/inkscape-0.92.4/), [latexmk](https://mg.readthedocs.io/latexmk.html),
and [texlive-latex-extra](https://wiki.ubuntuusers.de/TeX_Live/) need to be installed on your system to render the figures and the PDF.
## Getting help
If you encounter failures, e.g. due to uninstalled python modules, restart ``make`` after running ``make clean``. If you encounter failures, e.g. due to uninstalled python modules, restart ``make`` after running ``make clean``.
If you encounter failures you suspect are due to deficiencies in this repository, please submit an If you encounter failures you suspect are due to deficiencies in this repository, please submit an

View file

@ -1,22 +1,38 @@
#!/usr/bin/env python #!/usr/bin/env python
import os.path as op import argparse
import itertools from collections import OrderedDict
import numpy as np from copy import deepcopy
import pylab as pl
import seaborn as sns
from remodnav import EyegazeClassifier
from glob import glob from glob import glob
import datalad.api as dl import itertools
import os.path as op
from os import environ
import sys
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D
import numpy as np
import pandas as pd
from scipy.signal import (
butter,
filtfilt,
)
from scipy.io import loadmat
from scipy.spatial import distance
import seaborn as sns
from sklearn.metrics import cohen_kappa_score
from datalad.api import get as datalad_get
from remodnav import (
EyegazeClassifier,
clf as CLF,
)
from remodnav.tests import utils as ut
#from remodnav.tests.test_labeled import load_data as load_anderson #from remodnav.tests.test_labeled import load_data as load_anderson
def load_anderson(category, name): def load_anderson(category, name):
from scipy.io import loadmat
from datalad.api import get
from remodnav import clf as CLF
import os.path as op
fname = op.join(*( fname = op.join(*(
('remodnav', 'remodnav', 'tests', 'data', 'anderson_etal', ('remodnav', 'remodnav', 'tests', 'data', 'anderson_etal',
'annotated_data') + \ 'annotated_data') + \
@ -26,7 +42,7 @@ def load_anderson(category, name):
) + \ ) + \
(name + ('' if name.endswith('.mat') else '.mat'),)) (name + ('' if name.endswith('.mat') else '.mat'),))
) )
get(fname) datalad_get(fname)
m = loadmat(fname) m = loadmat(fname)
# viewing distance # viewing distance
vdist = m['ETdata']['viewDist'][0][0][0][0] vdist = m['ETdata']['viewDist'][0][0][0][0]
@ -258,7 +274,7 @@ def confusion(refcoder,
# initialize a maximum misclassification rate, to later automatically reference, # initialize a maximum misclassification rate, to later automatically reference,
max_mclf = 0 max_mclf = 0
# coders are in axis labels too # coders are in axis labels too
#pl.suptitle('Jaccard index for movement class labeling {} vs. {}'.format( #plt.suptitle('Jaccard index for movement class labeling {} vs. {}'.format(
# refcoder, coder)) # refcoder, coder))
for stimtype, stimlabel in ( for stimtype, stimlabel in (
('img', 'Images'), ('img', 'Images'),
@ -298,11 +314,11 @@ def confusion(refcoder,
nlabels = [len(l) for l in labels] nlabels = [len(l) for l in labels]
if len(np.unique(nlabels)) > 1: if len(np.unique(nlabels)) > 1:
print( rsout(
"% #\n% # %INCONSISTENCY Found label length mismatch " "% #\n% # %INCONSISTENCY Found label length mismatch "
"between coders ({}, {}) for: {}\n% #\n".format( "between coders ({}, {}) for: {}\n% #\n".format(
refcoder, coder, fname)) refcoder, coder, fname))
print('% Truncate labels to shorter sample: {}'.format( rsout('% Truncate labels to shorter sample: {}'.format(
nlabels)) nlabels))
order_idx = np.array(nlabels).argsort() order_idx = np.array(nlabels).argsort()
labels[order_idx[1]] = \ labels[order_idx[1]] = \
@ -329,7 +345,7 @@ def confusion(refcoder,
# zero out diagonal for bandwidth # zero out diagonal for bandwidth
conf *= ((np.eye(len(conditions)) - 1) * -1) conf *= ((np.eye(len(conditions)) - 1) * -1)
if figures: if figures:
pl.subplot(1, 3, plotter) plt.subplot(1, 3, plotter)
sns.heatmap( sns.heatmap(
#(conf / nsamples) * 100, #(conf / nsamples) * 100,
jinter / junion, jinter / junion,
@ -341,14 +357,14 @@ def confusion(refcoder,
vmin=0.0, vmin=0.0,
vmax=1.0, vmax=1.0,
) )
pl.xlabel('{} labeling'.format(refcoder)) plt.xlabel('{} labeling'.format(refcoder))
pl.ylabel('{} labeling'.format(coder)) plt.ylabel('{} labeling'.format(coder))
# stats are given proper below # stats are given proper below
#pl.title('"{}" (glob. misclf-rate): {:.1f}% (w/o pursuit: {:.1f}%)'.format( #plt.title('"{}" (glob. misclf-rate): {:.1f}% (w/o pursuit: {:.1f}%)'.format(
# stimtype, # stimtype,
# (np.sum(conf) / nsamples) * 100, # (np.sum(conf) / nsamples) * 100,
# (np.sum(conf[:3, :3]) / nsamples_nopurs) * 100)) # (np.sum(conf[:3, :3]) / nsamples_nopurs) * 100))
pl.title(stimlabel) plt.title(stimlabel)
plotter += 1 plotter += 1
msclf_refcoder = dict(zip(conditions, conf.sum(axis=1)/conf.sum() * 100)) msclf_refcoder = dict(zip(conditions, conf.sum(axis=1)/conf.sum() * 100))
msclf_coder = dict(zip(conditions, conf.sum(axis=0)/conf.sum() * 100)) msclf_coder = dict(zip(conditions, conf.sum(axis=0)/conf.sum() * 100))
@ -368,7 +384,7 @@ def confusion(refcoder,
('SACcod', '%.0f', msclf_coder['SAC']), ('SACcod', '%.0f', msclf_coder['SAC']),
('PSOcod', '%.0f', msclf_coder['PSO']), ('PSOcod', '%.0f', msclf_coder['PSO']),
('SPcod', '%.0f', msclf_coder['PUR'])): ('SPcod', '%.0f', msclf_coder['PUR'])):
print('\\newcommand{\\%s%s}{%s}' rsout('\\newcommand{\\%s%s}{%s}'
% (label_prefix, key, format % value)) % (label_prefix, key, format % value))
# update classification performance only if there sth worse # update classification performance only if there sth worse
if (np.sum(conf[:3, :3]) / nsamples_nopurs * 100) > max_mclf: if (np.sum(conf[:3, :3]) / nsamples_nopurs * 100) > max_mclf:
@ -376,10 +392,10 @@ def confusion(refcoder,
# print original outputs, but make them LaTeX-safe with '%'. This # print original outputs, but make them LaTeX-safe with '%'. This
# should make it easier to check correct placements of stats in the # should make it easier to check correct placements of stats in the
# table # table
print('% ### {}'.format(stimtype)) rsout('% ### {}'.format(stimtype))
print('% Comparison | MCLF | MCLFw/oP | Method | Fix | Sacc | PSO | SP') rsout('% Comparison | MCLF | MCLFw/oP | Method | Fix | Sacc | PSO | SP')
print('% --- | --- | --- | --- | --- | --- | --- | ---') rsout('% --- | --- | --- | --- | --- | --- | --- | ---')
print('% {} v {} | {:.1f} | {:.1f} | {} | {:.0f} | {:.0f} | {:.0f} | {:.0f}'.format( rsout('% {} v {} | {:.1f} | {:.1f} | {} | {:.0f} | {:.0f} | {:.0f} | {:.0f}'.format(
refcoder, refcoder,
coder, coder,
(np.sum(conf) / nsamples) * 100, (np.sum(conf) / nsamples) * 100,
@ -390,7 +406,7 @@ def confusion(refcoder,
msclf_refcoder['PSO'], msclf_refcoder['PSO'],
msclf_refcoder['PUR'], msclf_refcoder['PUR'],
)) ))
print('% -- | -- | -- | {} | {:.0f} | {:.0f} | {:.0f} | {:.0f}'.format( rsout('% -- | -- | -- | {} | {:.0f} | {:.0f} | {:.0f} | {:.0f}'.format(
coder, coder,
msclf_coder['FIX'], msclf_coder['FIX'],
msclf_coder['SAC'], msclf_coder['SAC'],
@ -400,14 +416,13 @@ def confusion(refcoder,
return max_mclf return max_mclf
def savefigs(fig, def mk_confusion_figures(fig, stat):
stat):
""" """
small helper function to save all confusion matrices small helper function to save all confusion matrices
""" """
max_mclf = 0 max_mclf = 0
for pair in itertools.combinations(['MN', 'RA', 'AL'], 2): for pair in itertools.combinations(['MN', 'RA', 'AL'], 2):
pl.figure( plt.figure(
# fake size to get the font size down in relation # fake size to get the font size down in relation
figsize=(14, 3), figsize=(14, 3),
dpi=120, dpi=120,
@ -416,15 +431,15 @@ def savefigs(fig,
pair[1], pair[1],
fig, fig,
stat) stat)
pl.savefig( plt.savefig(
op.join('img', 'confusion_{}_{}.svg'.format(*pair)), op.join('img', 'confusion_{}_{}.svg'.format(*pair)),
transparent=True, transparent=True,
bbox_inches="tight") bbox_inches="tight")
pl.close() plt.close()
if cur_max_mclf > max_mclf: if cur_max_mclf > max_mclf:
max_mclf = cur_max_mclf max_mclf = cur_max_mclf
if stat: if stat:
print('\\newcommand{\\maxmclf}{%s}' rsout('\\newcommand{\\maxmclf}{%s}'
% ('%.1f' % max_mclf)) % ('%.1f' % max_mclf))
@ -441,8 +456,6 @@ def quality_stats():
list of command invocations if the script is ran from the command line at the list of command invocations if the script is ran from the command line at the
end of the script. end of the script.
""" """
import matplotlib.pyplot as plt
datapath_mri = op.join('data', 'raw_eyegaze', 'sub-*', 'ses-movie', 'func', datapath_mri = op.join('data', 'raw_eyegaze', 'sub-*', 'ses-movie', 'func',
'sub-*_ses-movie_task-movie_run-*_recording-eyegaze_physio.tsv.gz') 'sub-*_ses-movie_task-movie_run-*_recording-eyegaze_physio.tsv.gz')
datapath_lab = op.join('data', 'raw_eyegaze', 'sub-*', 'beh', datapath_lab = op.join('data', 'raw_eyegaze', 'sub-*', 'beh',
@ -452,7 +465,7 @@ def quality_stats():
(datapath_mri, 'mri')]: (datapath_mri, 'mri')]:
infiles = glob(data) infiles = glob(data)
for f in infiles: for f in infiles:
dl.get(f) datalad_get(f)
# make sure we have 15 subjects' data # make sure we have 15 subjects' data
assert len(infiles) == 120 assert len(infiles) == 120
print("Currently processing data from {} sample".format(assoc)) print("Currently processing data from {} sample".format(assoc))
@ -478,7 +491,7 @@ def quality_stats():
loss = np.nanmean(losses) loss = np.nanmean(losses)
# print results as Latex command using 'assoc' as sample identifier in name # print results as Latex command using 'assoc' as sample identifier in name
label_loss = 'avgloss{}'.format(assoc) label_loss = 'avgloss{}'.format(assoc)
print('\\newcommand{\\%s}{%s}' rsout('\\newcommand{\\%s}{%s}'
% (label_loss, loss)) % (label_loss, loss))
# vels is a list of arrays atm # vels is a list of arrays atm
v = np.concatenate(vels).ravel() v = np.concatenate(vels).ravel()
@ -586,9 +599,6 @@ def S2SRMS():
https://doi.org/10.1038/sdata.2016.92 https://doi.org/10.1038/sdata.2016.92
""" """
from scipy.spatial import distance
import datalad.api as dl
# globs for all data for MRI and Lab # globs for all data for MRI and Lab
datapath_mri = op.join('data', 'raw_eyegaze', 'sub-*', 'ses-movie', 'func', datapath_mri = op.join('data', 'raw_eyegaze', 'sub-*', 'ses-movie', 'func',
'sub-*_ses-movie_task-movie_run-*_recording-eyegaze_physio.tsv.gz') 'sub-*_ses-movie_task-movie_run-*_recording-eyegaze_physio.tsv.gz')
@ -609,7 +619,7 @@ def S2SRMS():
window = 1000 window = 1000
median_distances = [] median_distances = []
for f in infiles: for f in infiles:
dl.get(f) datalad_get(f)
data = np.genfromtxt(f, data = np.genfromtxt(f,
delimiter='\t', delimiter='\t',
usecols=(0, 1) # get only x and y column usecols=(0, 1) # get only x and y column
@ -632,7 +642,7 @@ def S2SRMS():
meanRMS = np.nanmean(median_distances) meanRMS = np.nanmean(median_distances)
# print results as Latex command using 'assoc' as sample identifier in name # print results as Latex command using 'assoc' as sample identifier in name
label_RMS = 'RMS{}'.format(assoc) label_RMS = 'RMS{}'.format(assoc)
print('\\newcommand{\\%s}{%s}' rsout('\\newcommand{\\%s}{%s}'
% (label_RMS, meanRMS)) % (label_RMS, meanRMS))
# save the results in variables # save the results in variables
if assoc == 'lab': if assoc == 'lab':
@ -646,9 +656,6 @@ def flowchart_figs():
Just for future reference: This is the subset of preprocessed and raw data Just for future reference: This is the subset of preprocessed and raw data
used for the flowchart of the algorithm. Not to be executed. used for the flowchart of the algorithm. Not to be executed.
""" """
import matplotlib.pyplot as plt
from scipy import signal
datapath = op.join('data', 'raw_eyegaze', 'sub-32', 'beh', datapath = op.join('data', 'raw_eyegaze', 'sub-32', 'beh',
'sub-32_task-movie_run-1_recording-eyegaze_physio.tsv.gz') 'sub-32_task-movie_run-1_recording-eyegaze_physio.tsv.gz')
data = np.recfromcsv(datapath, data = np.recfromcsv(datapath,
@ -690,7 +697,7 @@ def flowchart_figs():
def _butter_lowpass(cutoff, fs, order=5): def _butter_lowpass(cutoff, fs, order=5):
nyq = 0.5 * fs nyq = 0.5 * fs
normal_cutoff = cutoff / nyq normal_cutoff = cutoff / nyq
b, a = signal.butter( b, a = butter(
order, order,
normal_cutoff, normal_cutoff,
btype='low', btype='low',
@ -703,8 +710,8 @@ def flowchart_figs():
# let's get a data sample with no saccade # let's get a data sample with no saccade
win_data = p[16600:17000] win_data = p[16600:17000]
b, a = _butter_lowpass(lp_cutoff_freq, sr) b, a = _butter_lowpass(lp_cutoff_freq, sr)
win_data['x'] = signal.filtfilt(b, a, win_data['x'], method='gust') win_data['x'] = filtfilt(b, a, win_data['x'], method='gust')
win_data['y'] = signal.filtfilt(b, a, win_data['y'], method='gust') win_data['y'] = filtfilt(b, a, win_data['y'], method='gust')
filtered_vels = cal_velocities(data=win_data, sr=1000, px2deg=0.0266711972026) filtered_vels = cal_velocities(data=win_data, sr=1000, px2deg=0.0266711972026)
fig, ax1 = plt.subplots() fig, ax1 = plt.subplots()
@ -727,14 +734,14 @@ def cal_velocities(data, sr, px2deg):
return velocities return velocities
def plot_raw_vel_trace(): def mk_raw_vel_trace_figures():
""" """
Small helper function to plot raw velocity traces, as requested by reviewer 2 Small helper function to plot raw velocity traces, as requested by reviewer 2
in the second revision. in the second revision.
""" """
import matplotlib.pyplot as plt # use the same data as in mk_eyegaze_classification_figures()
# use the same data as in savegaze() (no need for file retrieval, should be there) # (no need for file retrieval, should be there)
dl.install(op.join('data', 'raw_eyegaze')) datalad_get(op.join('data', 'raw_eyegaze'), get_data=False)
infiles = [ infiles = [
op.join( op.join(
'data', 'data',
@ -751,7 +758,7 @@ def plot_raw_vel_trace():
# load data # load data
for i, f in enumerate(infiles): for i, f in enumerate(infiles):
# read data # read data
dl.get(f) datalad_get(f)
data = np.recfromcsv(f, data = np.recfromcsv(f,
delimiter='\t', delimiter='\t',
names=['x', 'y', 'pupil', 'frame']) names=['x', 'y', 'pupil', 'frame'])
@ -798,20 +805,17 @@ def plot_raw_vel_trace():
ax2.plot(time_idx, ax2.plot(time_idx,
velocities, velocities,
color=vel_color, lw=1) color=vel_color, lw=1)
pl.savefig( plt.savefig(
op.join('img', 'rawtrace_{}.svg'.format(ext)), op.join('img', 'rawtrace_{}.svg'.format(ext)),
transparent=True, transparent=True,
bbox_inches="tight") bbox_inches="tight")
pl.close() plt.close()
def savegaze(): def mk_eyegaze_classification_figures():
""" """
small function to generate and save remodnav classification figures small function to generate and save remodnav classification figures
""" """
from remodnav.tests import utils as ut
import pylab as pl
# use two examplary files (lab + MRI) used during testing as well # use two examplary files (lab + MRI) used during testing as well
# hardcoding those, as I see no reason for updating them # hardcoding those, as I see no reason for updating them
infiles = [ infiles = [
@ -827,7 +831,7 @@ def savegaze():
] ]
# one call per file due to https://github.com/datalad/datalad/issues/3356 # one call per file due to https://github.com/datalad/datalad/issues/3356
for f in infiles: for f in infiles:
dl.get(f) datalad_get(f)
for f in infiles: for f in infiles:
# read data # read data
data = np.recfromcsv(f, data = np.recfromcsv(f,
@ -846,11 +850,11 @@ def savegaze():
# for both data types (lab & mri) # for both data types (lab & mri)
events = clf(p[15000:25000]) events = clf(p[15000:25000])
# we remove plotting of details in favor of plotting raw gaze and # we remove plotting of details in favor of plotting raw gaze and
# velocity traces with plot_raw_vel_trace() as requested by reviewer 2 # velocity traces with mk_raw_vel_trace_figures() as requested by reviewer 2
# in the second round of revision # in the second round of revision
#events_detail = clf(p[24500:24750]) #events_detail = clf(p[24500:24750])
fig = pl.figure( fig = plt.figure(
# fake size to get the font size down in relation # fake size to get the font size down in relation
figsize=(14, 2), figsize=(14, 2),
dpi=120, dpi=120,
@ -861,14 +865,14 @@ def savegaze():
sampling_rate=1000.0, sampling_rate=1000.0,
adswa commented 2021-07-11 13:05:56 +00:00 (Migrated from github.com)
            vel_lim=(0.001, 1000))

While we are at it, we could set the lower bounds of the y-axis to not exactly zero. The data is logscaled, and this results in

/home/adina/repos/papers/paper-remodnav/remodnav/remodnav/tests/utils.py:124: UserWarning: Attempted to set non-positive bottom ylim on a log-scaled axis.
Invalid limit will be ignored.
  ax2.set_ylim(vel_lim)
```suggestion vel_lim=(0.001, 1000)) ``` While we are at it, we could set the lower bounds of the y-axis to not exactly zero. The data is logscaled, and this results in ``` /home/adina/repos/papers/paper-remodnav/remodnav/remodnav/tests/utils.py:124: UserWarning: Attempted to set non-positive bottom ylim on a log-scaled axis. Invalid limit will be ignored. ax2.set_ylim(vel_lim) ```
show_vels=True, show_vels=True,
coord_lim=(0, 1280), coord_lim=(0, 1280),
vel_lim=(0, 1000)) vel_lim=(0.001, 1000))
pl.savefig( plt.savefig(
op.join('img', 'remodnav_{}.svg'.format(ext)), op.join('img', 'remodnav_{}.svg'.format(ext)),
transparent=True, transparent=True,
bbox_inches="tight") bbox_inches="tight")
pl.close() plt.close()
# plot details # plot details
fig = pl.figure( fig = plt.figure(
# fake size to get the font size down in relation # fake size to get the font size down in relation
figsize=(7, 2), figsize=(7, 2),
dpi=120, dpi=120,
@ -880,21 +884,17 @@ def savegaze():
# show_vels=True, # show_vels=True,
# coord_lim=(0, 1280), # coord_lim=(0, 1280),
# vel_lim=(0, 1000)) # vel_lim=(0, 1000))
#pl.savefig( #plt.savefig(
# op.join('img', 'remodnav_{}_detail.svg'.format(ext)), # op.join('img', 'remodnav_{}_detail.svg'.format(ext)),
# transparent=True, # transparent=True,
# bbox_inches="tight") # bbox_inches="tight")
#pl.close() #plt.close()
def mainseq(s_mri, def mk_mainseq_figures(s_mri, s_lab):
s_lab):
""" """
plot main sequences from movie data for lab and mri subjects. plot main sequences from movie data for lab and mri subjects.
""" """
import pandas as pd
from matplotlib.lines import Line2D
datapath = op.join('data', datapath = op.join('data',
'studyforrest-data-eyemovementlabels', 'studyforrest-data-eyemovementlabels',
'sub*', 'sub*',
@ -904,8 +904,7 @@ def mainseq(s_mri,
# while the visible content hardly changes # while the visible content hardly changes
'*run-2*.tsv') '*run-2*.tsv')
data = sorted(glob(datapath)) data = sorted(glob(datapath))
from datalad.api import get datalad_get(path=data)
get(dataset='.', path=data)
# create dataframes for mri and lab subjects to plot in separate plots # create dataframes for mri and lab subjects to plot in separate plots
for (ids, select_sub, ext) in [ for (ids, select_sub, ext) in [
@ -934,7 +933,7 @@ def mainseq(s_mri,
SACCs = d[(d.label == 'SACC') | (d.label == 'ISAC')] SACCs = d[(d.label == 'SACC') | (d.label == 'ISAC')]
PSOs = d[(d.label == 'HPSO') | (d.label == 'IHPS') | (d.label == 'LPSO') | (d.label == 'ILPS')] PSOs = d[(d.label == 'HPSO') | (d.label == 'IHPS') | (d.label == 'LPSO') | (d.label == 'ILPS')]
fig = pl.figure( fig = plt.figure(
# fake size to get the font size down in relation # fake size to get the font size down in relation
figsize=(6, 4), figsize=(6, 4),
dpi=120, dpi=120,
@ -944,7 +943,7 @@ def mainseq(s_mri,
(SACCs, '.', 'red'), (SACCs, '.', 'red'),
(PSOs, '+', 'darkblue'), (PSOs, '+', 'darkblue'),
): ):
pl.loglog( plt.loglog(
ev['amp'], ev['amp'],
ev['peak_vel'], ev['peak_vel'],
sym, sym,
@ -972,12 +971,12 @@ def mainseq(s_mri,
markersize=10), markersize=10),
] ]
pl.ylim((10.0, 1000)) plt.ylim((10.0, 1000))
pl.xlim((0.01, 40.0)) plt.xlim((0.01, 40.0))
pl.legend(handles=custom_legend, loc=4) plt.legend(handles=custom_legend, loc=4)
pl.ylabel('peak velocities (deg/s)') plt.ylabel('peak velocities (deg/s)')
pl.xlabel('amplitude (deg)') plt.xlabel('amplitude (deg)')
pl.savefig( plt.savefig(
op.join( op.join(
'img', 'img',
'mainseq{}_{}.svg'.format( 'mainseq{}_{}.svg'.format(
@ -985,7 +984,7 @@ def mainseq(s_mri,
ext)), ext)),
transparent=True, transparent=True,
bbox_inches="tight") bbox_inches="tight")
pl.close() plt.close()
def RMSD(mn, def RMSD(mn,
@ -1058,7 +1057,6 @@ def get_remodnav_params(stim_type):
for ev in events: for ev in events:
ev['label'] = label_map[ev['label']] ev['label'] = label_map[ev['label']]
from collections import OrderedDict
durs = OrderedDict() durs = OrderedDict()
durs['event']=[] durs['event']=[]
durs['alg']=[] durs['alg']=[]
@ -1083,7 +1081,6 @@ def print_RMSD():
for use in main.tex. for use in main.tex.
""" """
# I don't want to overwrite the original dicts # I don't want to overwrite the original dicts
from copy import deepcopy
img = deepcopy(image_params) img = deepcopy(image_params)
dots = deepcopy(dots_params) dots = deepcopy(dots_params)
video = deepcopy(video_params) video = deepcopy(video_params)
@ -1107,7 +1104,7 @@ def print_RMSD():
label_prefix = '{}{}{}{}'.format(ev, stim, par, alg) label_prefix = '{}{}{}{}'.format(ev, stim, par, alg)
# take the value of the event and param type by indexing the dict with the position of # take the value of the event and param type by indexing the dict with the position of
# the current algorithm # the current algorithm
print('\\newcommand{\\%s}{%s}' rsout('\\newcommand{\\%s}{%s}'
%(label_prefix, dic[0][ev][par][dic[0][ev]['alg'].index(alg)])) %(label_prefix, dic[0][ev][par][dic[0][ev]['alg'].index(alg)]))
# compute RMSDs for every stimulus category # compute RMSDs for every stimulus category
for ev in event_types: for ev in event_types:
@ -1118,28 +1115,26 @@ def print_RMSD():
algo = dic[0][ev]['alg'] algo = dic[0][ev]['alg']
for i in range(len(rmsd)): for i in range(len(rmsd)):
label = 'rank{}{}{}'.format(ev, stim, algo[i]) label = 'rank{}{}{}'.format(ev, stim, algo[i])
print('\\newcommand{\\%s}{%s}' rsout('\\newcommand{\\%s}{%s}'
%(label, rmsd[i])) %(label, rmsd[i]))
def plot_dist(figures): def mk_event_duration_histograms(figures):
""" """
Plot the events duration distribution per movie run, per data set. Plot the events duration distribution per movie run, per data set.
""" """
import pandas as pd
# do nothing if we don't want to plot # do nothing if we don't want to plot
if not figures: if not figures:
return return
dl.install(op.join('data', 'studyforrest-data-eyemovementlabels')) datalad_get(op.join('data', 'studyforrest-data-eyemovementlabels'))
datapath = op.join('data', datapath = op.join('data',
'studyforrest-data-eyemovementlabels', 'studyforrest-data-eyemovementlabels',
'sub*', 'sub*',
'*.tsv') '*.tsv')
data = sorted(glob(datapath)) data = sorted(glob(datapath))
dl.get(dataset='.', path=data) datalad_get(path=data, get_data=False)
for ds, ds_name in [(mri_ids, 'mri'), (lab_ids, 'lab')]: for ds, ds_name in [(mri_ids, 'mri'), (lab_ids, 'lab')]:
dfs = [ dfs = [
@ -1161,16 +1156,16 @@ def plot_dist(figures):
(FIX, 'fixation', (0, 1.0), (1, 50000)), (FIX, 'fixation', (0, 1.0), (1, 50000)),
(PSOs, 'PSO', (0, 0.04), (1, 26000)), (PSOs, 'PSO', (0, 0.04), (1, 26000)),
(PURs, 'pursuit', (0, .8), (1, 30000))]: (PURs, 'pursuit', (0, .8), (1, 30000))]:
fig = pl.figure(figsize=(3,2)) fig = plt.figure(figsize=(3,2))
pl.hist(ev_df['duration'].values, plt.hist(ev_df['duration'].values,
bins='doane', bins='doane',
range=x_lim, range=x_lim,
color='gray') color='gray')
#log=True) #log=True)
pl.xlabel('{} duration in s'.format(label)) plt.xlabel('{} duration in s'.format(label))
pl.xlim(x_lim) plt.xlim(x_lim)
pl.ylim(y_lim) plt.ylim(y_lim)
pl.savefig( plt.savefig(
op.join( op.join(
'img', 'img',
'hist_{}_{}.svg'.format( 'hist_{}_{}.svg'.format(
@ -1178,7 +1173,7 @@ def plot_dist(figures):
ds_name)), ds_name)),
transparent=True, transparent=True,
bbox_inches="tight") bbox_inches="tight")
pl.close() plt.close()
def kappa(): def kappa():
@ -1189,7 +1184,6 @@ def kappa():
""" """
px2deg = None px2deg = None
sr = None sr = None
from sklearn.metrics import cohen_kappa_score
# for every stimulus type # for every stimulus type
for stim in ['img', 'dots', 'video']: for stim in ['img', 'dots', 'video']:
# for every eye movement label used in Anderson et al. (2017) # for every eye movement label used in Anderson et al. (2017)
@ -1229,11 +1223,11 @@ def kappa():
AL_res.append(labels) AL_res.append(labels)
if len(MN_res[idx]) != len(RA_res[idx]): if len(MN_res[idx]) != len(RA_res[idx]):
print( rsout(
"% #\n% # %INCONSISTENCY Found label length mismatch " "% #\n% # %INCONSISTENCY Found label length mismatch "
"between coders for: {}\n% #\n".format(fname)) "between coders for: {}\n% #\n".format(fname))
shorter = min([len(RA_res[idx]), len(MN_res[idx])]) shorter = min([len(RA_res[idx]), len(MN_res[idx])])
print('% Truncate labels to shorter sample: {}'.format( rsout('% Truncate labels to shorter sample: {}'.format(
shorter)) shorter))
# truncate the labels by indexing up to the highest index # truncate the labels by indexing up to the highest index
# in the shorter list of labels # in the shorter list of labels
@ -1246,7 +1240,7 @@ def kappa():
RA_res_flat = [item for sublist in RA_res for item in sublist] RA_res_flat = [item for sublist in RA_res for item in sublist]
MN_res_flat = [item for sublist in MN_res for item in sublist] MN_res_flat = [item for sublist in MN_res for item in sublist]
AL_res_flat = [item for sublist in AL_res for item in sublist] AL_res_flat = [item for sublist in AL_res for item in sublist]
#print(sum(RA_res_flat), sum(MN_res_flat)) #rsout(sum(RA_res_flat), sum(MN_res_flat))
assert len(RA_res_flat) == len(MN_res_flat) == len(AL_res_flat) assert len(RA_res_flat) == len(MN_res_flat) == len(AL_res_flat)
# compute Cohens Kappa # compute Cohens Kappa
for rating, comb in [('RAMN', [RA_res_flat, MN_res_flat]), for rating, comb in [('RAMN', [RA_res_flat, MN_res_flat]),
@ -1254,13 +1248,16 @@ def kappa():
('ALMN', [MN_res_flat, AL_res_flat])]: ('ALMN', [MN_res_flat, AL_res_flat])]:
kappa = cohen_kappa_score(comb[0], comb[1]) kappa = cohen_kappa_score(comb[0], comb[1])
label = 'kappa{}{}{}'.format(rating, stim, ev) label = 'kappa{}{}{}'.format(rating, stim, ev)
print('\\newcommand{\\%s}{%s}' % (label, '%.2f' % kappa)) rsout('\\newcommand{\\%s}{%s}' % (label, '%.2f' % kappa))
def rsout(str):
fname = environ.get('REMODNAV_RESULTS', None)
fobj = open(fname, 'a') if fname else sys.stdout
print(str, file=fobj)
if __name__ == '__main__': if __name__ == '__main__':
import argparse
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument( parser.add_argument(
'-f', '--figure', help='if given, figures will be produced.', '-f', '--figure', help='if given, figures will be produced.',
@ -1289,12 +1286,19 @@ if __name__ == '__main__':
args = parser.parse_args() args = parser.parse_args()
# generate & save figures; export the stats # generate & save figures; export the stats
if args.figure or args.stats: if args.figure or args.stats:
savefigs(args.figure, args.stats) print("#\n# Render confusion matrix figures")
mk_confusion_figures(args.figure, args.stats)
print("#\n# Compute RMSD table values")
print_RMSD() print_RMSD()
plot_dist(args.figure) print('#\n# Render event duration histograms')
mk_event_duration_histograms(args.figure)
print("#\n# Compute kappa score for inter-rater agreements")
kappa() kappa()
plot_raw_vel_trace() print('#\n# Render raw velocity traces')
mk_raw_vel_trace_figures()
if args.mainseq: if args.mainseq:
mainseq(args.submri, args.sublab) print('#\n# Render main sequence plots')
mk_mainseq_figures(args.submri, args.sublab)
if args.remodnav: if args.remodnav:
savegaze() print('#\n# Render eyegaze classification plots')
mk_eyegaze_classification_figures()

2
img/.gitignore vendored
View file

@ -3,6 +3,8 @@ confusion_MN_RA.*
confusion_RA_AL.* confusion_RA_AL.*
remodnav_lab.* remodnav_lab.*
remodnav_mri.* remodnav_mri.*
rawtrace_lab.*
rawtrace_mri.*
remodnav_lab.pdf remodnav_lab.pdf
remodnav_mri.pdf remodnav_mri.pdf
confusion_MN_RA.pdf confusion_MN_RA.pdf

View file

@ -1,21 +1,16 @@
all: pics all: pics
# Optionally some PICS could be ignored. By default XXX ones. # use `chronic` to make output look neater, if available
# PICS_IGNORE must contain a rule for grep CHRONIC=$(shell which chronic || echo '' )
PICS_IGNORE ?= " "
# For every .svg we must have a pdf # For every .svg we must have a pdf
PICS=$(shell find . -iname \*svg \ pics: $(shell find . -iname \*.svg | sed -e 's/svg/pdf/g' )
| sed -e 's/svg/pdf/g' -e 's/\([^\]\)\([ \t:]\)/\1\\\\\2/g' \
| grep -v -e $(PICS_IGNORE) )
pics: $(PICS)
echo $(PICS)
clean: clean:
for p in *.svg; do rm -f $${p%*.svg}.eps $${p%*.svg}.pdf; done for p in *.svg; do rm -f $${p%*.svg}.eps $${p%*.svg}.pdf; done
-rm -rf *_300dpi.png -rm -rf *_300dpi.png
# git-ignore each auto-rendered figure
.PHONY: ignore-% .PHONY: ignore-%
ignore-%: ignore-%:
@grep -q "^$*$$" .gitignore || { \ @grep -q "^$*$$" .gitignore || { \
@ -24,16 +19,9 @@ ignore-%:
# #
# Inkscape rendered figures # Inkscape rendered figures
# #
# try modern-age API first, fall-back on old one, if needed
%.pdf: %.svg ignore-%.pdf %.pdf: %.svg ignore-%.pdf
@echo "Rendering $@" @echo "Rendering $@"
@inkscape -z -f "$<" -A "$@" || inkscape --export-filename="$@" "$<" @$(CHRONIC) inkscape --export-filename="$@" "$<" || $(CHRONIC) inkscape -z -f "$<" -A "$@"
%.eps: %.svg ignore-%.eps
@echo "Rendering $@"
@inkscape -z -f "$<" -E "$@" || inkscape --export-filename="$@" "$<"
%_300dpi.png: %.svg ignore-%_300dpi.png
@echo "Rendering $@"
@inkscape -z -f "$<" -e "$@" -d 300 || inkscape -d 300 --export-filename="$@" "$<"
.PHONY: all pics .PHONY: all pics