Source code for adnipy.adnipy

# -*- coding: utf-8 -*-

"""Process ADNI study data with adnipy."""

# Standard library imports
import warnings

# Third party imports
import pandas as pd

[docs]def read_csv(file): """Return a csv file as a pandas.DataFrame. Recognizes missing values used in the ADNI database. Parameters ---------- file : str, pathlib.Path The path to the .csv file. Returns ------- pd.DataFrame Returns the file as a dataframe. See Also -------- standard_column_names standard_dates standard_index """ # empty values na_values = ["-1", "-4"] # prevents UserWarnings on large files like ADNIMERGE dtype = { "ABETA": object, "TAU": object, "TAU_bl": object, "PTAU": object, "PTAU_bl": object, } dataframe = pd.read_csv(file, dtype=dtype, na_values=na_values) return dataframe
[docs]def timedelta(old, new): """Get timedelta between timepoints. Parameters ---------- old : pd.DataFrame This is the older dataframe. new : pd.DataFrame This is the newer dataframe. Returns ------- pd.Series The content will be timedelta values. Look into numpy for more options. """ old = old.reset_index() old = old.set_index("Subject ID") new = new.reset_index() new = new.set_index("Subject ID") timedeltas = old["SCANDATE"] - new["SCANDATE"] return timedeltas
[docs]def get_matching_images(left, right): """Match different scan types based on closest date. The columns 'Subject ID' and 'SCANDATE' are required. Parameters ---------- left : pd.DataFrame Dataframe containing the tau scans. right : pd.DataFrame Dataframe containing the mri scans. Returns ------- pd.DataFrame For each timepoint there is a match from both inputs. """ left = left.set_index(["Subject ID", "SCANDATE"]) left = left.sort_index() right = right.set_index(["Subject ID", "SCANDATE"]) right = right.sort_index() missing_match = [] matching_images = [] right_subjects = right.index.get_level_values(0) def closest_date(subject, index): """Get closest date from list.""" unique_dates = subject.index.unique() closest_date = min(unique_dates, key=lambda x, index=index: abs(x - index[1])) return closest_date for index in left.index: if index[0] in right_subjects: subject = right.loc[index[0]] date = closest_date(subject, index) matching_image = right.loc[index[0], date] image = left.loc[[index]] image["Image ID_r"] = matching_image.values[0] matching_images.append(image) else: missing_match.append(index) matching_images_df = pd.concat(matching_images) matching_images_df = matching_images_df.rename(columns={"Image ID": "Image ID_l"}) if missing_match: missing_match_str = str(set(missing_match)) message = "Could not find matching images for:" + missing_match_str warnings.warn(message, stacklevel=1) return matching_images_df