Note
Go to the end to download the full example code or to run this example in your browser via Binder.
Exploring Braindecode’s BIDSDataset#
Estimated reading time:2 minutes
Tests showing BIDSDataset not able to handle example EEGLAB dataset and slower than pybids
from pathlib import Path
import os
os.environ.setdefault("NUMBA_DISABLE_JIT", "1")
os.environ.setdefault("_MNE_FAKE_HOME_DIR", str(Path.cwd()))
(Path(os.environ["_MNE_FAKE_HOME_DIR"]) / ".mne").mkdir(exist_ok=True)
from bids import BIDSLayout
from braindecode.datasets import BIDSDataset
from eegdash import EEGDash, EEGDashDataset
CACHE_DIR = Path(os.getenv("EEGDASH_CACHE_DIR", Path.cwd() / "eegdash_cache")).resolve()
CACHE_DIR.mkdir(parents=True, exist_ok=True)
DATASET_ID = os.getenv("EEGDASH_DATASET_ID", "ds002718")
eegdash = EEGDash()
records = eegdash.find({"dataset": DATASET_ID}, limit=3)
if not records:
raise RuntimeError(f"No records found for dataset {DATASET_ID}.")
dataset = EEGDashDataset(cache_dir=CACHE_DIR, records=records)
try:
_ = dataset.datasets[0].raw
except RuntimeError as exc:
print(f"Raw read failed (likely missing coordsystem.json): {exc}")
root = CACHE_DIR / DATASET_ID
bids = BIDSDataset(root=str(root), preload=False)
# Can't import regular EEGLAB dataset
Tests showing pybids utilities as well as limitations
Recording files can be retrieved fast
File path can be mapped to BIDS file using simple additional parsing
Needed info such as duration and channel count can be retrieved easily
Not all file level metadata files can be retrieved even though they exist
Top level json associated with a file can’t be retrieved from file level
def get_recordings(layout: BIDSLayout):
extensions = {
".set": [".set", ".fdt"], # eeglab
".edf": [".edf"], # european
".vhdr": [".eeg", ".vhdr", ".vmrk", ".dat", ".raw"], # brainvision
".bdf": [".bdf"], # biosemi
}
files = []
for ext, exts in extensions.items():
files = layout.get(extension=ext, return_type="filename")
if files:
break
return files
print(get_recordings(BIDSLayout(str(root))))
layout = BIDSLayout(str(root))
# get file from path
recordings = get_recordings(layout)
if not recordings:
raise RuntimeError(f"No EEG recordings found under {root}.")
example_file = recordings[0]
entities = layout.parse_file_entities(example_file)
bidsfile = layout.get(**entities)[0]
print(bidsfile)
import pprint
# get general info of a recording
pprint.pprint(bidsfile.get_entities(metadata="all"))
get associations doesn’t give us all desired bids dependencies
bidsfile.get_associations()
top level events.json can’t be retrieved from a file level
file_entities = bidsfile.get_entities()
# remove 'datatype'
file_entities.pop("datatype")
file_entities["suffix"] = "events"
file_entities["extension"] = ".json"
print(file_entities)
print(layout.get(**file_entities))
print(layout.get(suffix="events", extension=".json"))
# not all file level metadata files can be retrieved even though they exist
file_entities["suffix"] = "events"
file_entities["extension"] = "tsv"
print(file_entities)
print(layout.get(**file_entities))
file_entities["suffix"] = "electrodes"
file_entities["extension"] = "tsv"
print(file_entities)
print(layout.get(**file_entities))
file_entities["suffix"] = "coordsystem"
file_entities["extension"] = "json"
print(file_entities)
print(layout.get(**file_entities))