Source code for adnipy.adnipy

"""Process ADNI study data with adnipy."""

# Standard library imports
import warnings
from io import StringIO

# Third party imports
import pandas as pd



[docs]
def read_csv(file: str | StringIO) -> pd.DataFrame:
    """Return a csv file as a pandas.DataFrame.

    Recognizes missing values used in the ADNI database.

    Parameters
    ----------
    file : str, pathlib.Path
        The path to the .csv file.

    Returns
    -------
    pd.DataFrame
        Returns the file as a dataframe.

    See Also
    --------
    standard_column_names
    standard_dates
    standard_index

    """
    # empty values
    na_values = ["-1", "-4"]

    return pd.read_csv(file, na_values=na_values)




[docs]
def timedelta(old: pd.DataFrame, new: pd.DataFrame) -> pd.Series:
    """Get timedelta between timepoints.

    Parameters
    ----------
    old : pd.DataFrame
        This is the older dataframe.
    new : pd.DataFrame
        This is the newer dataframe.

    Returns
    -------
    pd.Series
        The content will be timedelta values. Look into numpy for more options.

    """
    old = old.reset_index()
    old = old.set_index("Subject ID")

    new = new.reset_index()
    new = new.set_index("Subject ID")

    return old["SCANDATE"] - new["SCANDATE"]




[docs]
def get_matching_images(left: pd.DataFrame, right: pd.DataFrame) -> pd.DataFrame:
    """Match different scan types based on closest date.

    The columns 'Subject ID' and 'SCANDATE' are required.

    Parameters
    ----------
    left : pd.DataFrame
        Dataframe containing the tau scans.
    right : pd.DataFrame
        Dataframe containing the mri scans.

    Returns
    -------
    pd.DataFrame
        For each timepoint there is a match from both inputs.

    """
    left = left.set_index(["Subject ID", "SCANDATE"])
    left = left.sort_index()

    right = right.set_index(["Subject ID", "SCANDATE"])
    right = right.sort_index()

    missing_match = []
    matching_images = []
    right_subjects = right.index.get_level_values(0)

    def closest_date(subject: pd.DataFrame, index: tuple) -> pd.Timestamp:
        """Get closest date from list."""
        unique_dates = subject.index.unique()
        return min(unique_dates, key=lambda x, index=index: abs(x - index[1]))  # type: ignore[misc]

    for index in left.index:
        if index[0] in right_subjects:
            subject = right.loc[index[0]]
            date = closest_date(subject, index)
            matching_image = right.loc[index[0], date]
            image = left.loc[[index]]
            image["Image ID_r"] = matching_image.to_numpy()[0]
            matching_images.append(image)
        else:
            missing_match.append(index)

    matching_images_df = pd.concat(matching_images)
    matching_images_df = matching_images_df.rename(columns={"Image ID": "Image ID_l"})

    if missing_match:
        missing_match_str = str(set(missing_match))
        message = "Could not find matching images for:" + missing_match_str
        warnings.warn(message, stacklevel=1)

    return matching_images_df