148 lines
No EOL
5.4 KiB
Text
148 lines
No EOL
5.4 KiB
Text
run 'datalad get sub-NDARINV00BD7VDC/ses-baselineYear1Arm1/func/sub-NDARINV00BD7VDC_ses-baselineYear1Arm1_task-rest_run-01_bold.json'
|
|
run 'datalad get sub-NDARINV00*/ses-baselineYear1Arm1/func/sub-NDARINV00*_ses-baselineYear1Arm1_task-rest_run-01_bold.json'
|
|
run 'for sub in sub-NDARINV00*; do echo ${sub}/ses-baselineYear1Arm1/func/${sub}_ses-baselineYear1Arm1_task-rest_run-01_bold.json; done > toget.txt'
|
|
run 'cat toget.txt | xargs -n 20 datalad get'
|
|
run 'datalad --version'
|
|
run 'git config --global --add user.name "Adina Wagner"
|
|
git config --global --add user.email "adina.wagner@t-online.de"'
|
|
|
|
say "DataLad datasets"
|
|
run 'datalad create -c yoda myanalysis'
|
|
run 'cd myanalysis
|
|
tree'
|
|
run 'ls -a # show also hidden files'
|
|
|
|
say "Now edit the README"
|
|
run '# we delete placeholders in README
|
|
echo " " >| README.md'
|
|
run 'echo " " >| code/README.md'
|
|
run 'datalad status'
|
|
run 'git diff'
|
|
run 'datalad save -m "Replace placeholder in README"'
|
|
run 'wget https://imgs.xkcd.com/comics/compiling.png'
|
|
run 'echo "#My first data analysis with DataLad" > README.md'
|
|
|
|
run 'datalad status'
|
|
run 'datalad save -m "Add project information to README" README.md'
|
|
run 'datalad save -m "Add a motivational webcomic"'
|
|
|
|
say 'RESEARCH LOG'
|
|
run 'git log README.md'
|
|
|
|
say "I WILL HIDE AN EASTEREGG NOW"
|
|
run 'datalad download-url -m "add motivational comic to my dataset" \
|
|
-O .easteregg.png \
|
|
https://imgs.xkcd.com/comics/fuck_grapefruit.png'
|
|
|
|
say 'CONSUME DATASETS'
|
|
run 'cd ../
|
|
datalad clone git@github.com:psychoinformatics-de/studyforrest-data-phase2.git'
|
|
run 'cd studyforrest-data-phase2'
|
|
run 'ls'
|
|
run 'du -sh'
|
|
run 'datalad get sub-01/ses-localizer/func/sub-01_ses-localizer_task-objectcategories_run-*.nii.gz'
|
|
run 'datalad drop sub-01/ses-localizer/func/sub-01_ses-localizer_task-objectcategories_run-4_bold.nii.gz'
|
|
|
|
say 'DROP DATA'
|
|
run 'cd ../myanalysis'
|
|
run 'datalad drop .easteregg.png'
|
|
run 'datalad drop compiling.png'
|
|
run 'xdg-open .easteregg.png # its gone :(!'
|
|
run 'datalad get .easteregg.png'
|
|
run 'xdg-open .easteregg.png'
|
|
|
|
say "NOW SUBDATASETS"
|
|
run 'datalad clone -d . git@github.com:datalad-handbook/iris_data.git input/'
|
|
run 'git show'
|
|
run 'cd input && git log'
|
|
run 'cd ..'
|
|
run 'datalad subdatasets'
|
|
run 'cat << EOT > code/script.py
|
|
|
|
import pandas as pd
|
|
import seaborn as sns
|
|
import datalad.api as dl
|
|
from sklearn import model_selection
|
|
from sklearn.neighbors import KNeighborsClassifier
|
|
from sklearn.metrics import classification_report
|
|
|
|
data = "input/iris.csv"
|
|
|
|
# make sure that the data are obtained (get will also install linked sub-ds!):
|
|
dl.get(data)
|
|
|
|
# prepare the data as a pandas dataframe
|
|
df = pd.read_csv(data)
|
|
attributes = ["sepal_length", "sepal_width", "petal_length","petal_width", "class"]
|
|
df.columns = attributes
|
|
|
|
# create a pairplot to plot pairwise relationships in the dataset
|
|
plot = sns.pairplot(df, hue='"'"'class'"'"', palette='"'"'muted'"'"')
|
|
plot.savefig('"'"'pairwise_relationships.png'"'"')
|
|
|
|
# perform a K-nearest-neighbours classification with scikit-learn
|
|
# Step 1: split data in test and training dataset (20:80)
|
|
array = df.values
|
|
X = array[:,0:4]
|
|
Y = array[:,4]
|
|
test_size = 0.20
|
|
seed = 7
|
|
X_train, X_test, Y_train, Y_test = model_selection.train_test_split(X, Y,
|
|
test_size=test_size,
|
|
random_state=seed)
|
|
# Step 2: Fit the model and make predictions on the test dataset
|
|
knn = KNeighborsClassifier()
|
|
knn.fit(X_train, Y_train)
|
|
predictions = knn.predict(X_test)
|
|
|
|
# Step 3: Save the classification report
|
|
report = classification_report(Y_test, predictions, output_dict=True)
|
|
df_report = pd.DataFrame(report).transpose().to_csv('"'"'prediction_report.csv'"'"')
|
|
|
|
EOT'
|
|
run 'datalad status'
|
|
run 'datalad save -m "add script for kNN classification and plotting" --version-tag ready4analysis code/script.py'
|
|
run 'datalad status'
|
|
run 'datalad run -m "analyze iris data with classification analysis" \
|
|
--input "input/iris.csv" \
|
|
--output "prediction_report.csv" \
|
|
--output "pairwise_relationships.png" \
|
|
"python3 code/script.py"'
|
|
run 'git log -n 1'
|
|
|
|
run 'cd ../
|
|
datalad clone git@github.com:adswa/myanalysis.git analysis_clone'
|
|
run 'cd analysis_clone'
|
|
run 'git log pairwise_relationships.png'
|
|
run 'datalad rerun 71cb8c5'
|
|
|
|
say 'COMPUTATIONAL REPRODUCIBILITY'
|
|
run 'cd ../myanalysis'
|
|
run 'datalad containers-add software --url shub://adswa/resources:2'
|
|
run 'datalad containers-run -m "rerun analysis in container" \
|
|
--container-name software \
|
|
--input "input/iris.csv" \
|
|
--output "prediction_report.csv" \
|
|
--output "pairwise_relationships.png" \
|
|
"python3 code/script.py"'
|
|
|
|
run 'sed -i '"'"'s/muted/Blues/g'"'"' code/script.py'
|
|
run 'git diff'
|
|
run 'datalad save -m "Update color scheme"'
|
|
run 'datalad rerun HEAD~1'
|
|
run 'git diff HEAD~1'
|
|
|
|
say 'ABCD DATASET'
|
|
run 'cd ../'
|
|
run 'datalad clone smaug.datalad.org:/mnt/datasets/datalad/crawl-misc/abcd/abcd-bids'
|
|
run 'cd abcd-bids'
|
|
run 'ls
|
|
du -sh'
|
|
run 'datalad subdatasets'
|
|
run 'datalad get -n sub-NDARINV00BD7VDC'
|
|
run 'datalad get sub-NDARINV00BD7VDC/ses-baselineYear1Arm1/func/sub-NDARINV00BD7VDC_ses-baselineYear1Arm1_task-rest_run-01_bold.nii.gz'
|
|
run 'datalad get sub-NDARINV00*/ses-baselineYear1Arm1/func/sub-NDARINV00*_ses-baselineYear1Arm1_task-rest_run-01_bold.nii.gz'
|
|
run 'for sub in sub-NDARINV00*; do echo ${sub}/ses-baselineYear1Arm1/func/${sub}_ses-baselineYear1Arm1_task-rest_run-01_bold.nii.gz; done > toget.txt'
|
|
run 'cat toget.txt | xargs -n 20 datalad get'
|
|
say 'done'
|
|
run 'echo done' |