import os
import numpy as np
import anndata as ad
import pandas as pd
import scanpy as sc
import copy
import warnings
from datetime import datetime
sc.settings.n_jobs = 32
warnings.filterwarnings("ignore")
pwd = os.getcwd()
region = "MTG"
date = "2022-08-18"
adata = sc.read_h5ad(os.path.join(pwd, "output", region, "SEAAD_MTG_RNAseq_all-nuclei." + date + ".h5ad"))
adata.obs["Supertype"].cat.rename_categories(
{
"VLMC_2": "Pericyte_1",
"VLMC_2_1-SEAAD": "SMC-SEAAD",
"VLMC_2_2-SEAAD": "Pericyte_2-SEAAD",
"Micro-PVM_2_2-SEAAD": "Lymphocyte",
"Micro-PVM_1_1-SEAAD": "Monocyte"
},
inplace=True
)
CPS = pd.read_csv(os.path.join(pwd, "input", region, "donor_name_CPS.csv"), index_col=0)
adata.obs = adata.obs.merge(CPS, left_on="Donor ID", right_index=True, how="left")
adata.write(os.path.join(pwd, "output", region, "SEAAD_MTG_RNAseq_all-nuclei." + str(datetime.date(datetime.now())) + ".h5ad"), compression="gzip")
region = "MTG"
date = "2022-08-18"
adata = sc.read_h5ad(os.path.join(pwd, "output", region, "SEAAD_MTG_RNAseq_final-nuclei." + date + ".h5ad"))
adata.obs["Supertype"].cat.rename_categories(
{
"VLMC_2": "Pericyte_1",
"VLMC_2_1-SEAAD": "SMC-SEAAD",
"VLMC_2_2-SEAAD": "Pericyte_2-SEAAD",
"Micro-PVM_2_2-SEAAD": "Lymphocyte",
"Micro-PVM_1_1-SEAAD": "Monocyte"
},
inplace=True
)
CPS = pd.read_csv(os.path.join(pwd, "input", region, "donor_name_CPS.csv"), index_col=0)
adata.obs = adata.obs.merge(CPS, left_on="Donor ID", right_index=True, how="left")
adata.write(os.path.join(pwd, "output", region, "SEAAD_MTG_RNAseq_final-nuclei." + str(datetime.date(datetime.now())) + ".h5ad"), compression="gzip")
region = "MTG"
date = "2023-05-05"
adata = sc.read_h5ad(os.path.join(pwd, "output", region, "SEAAD_MTG_RNAseq_all-nuclei." + date + ".h5ad"))
metadata = pd.read_excel(
os.path.join(pwd, "input", region, "Supplementary Table 1.xlsx"),
sheet_name="SEA-AD_Cohort_Metadata",
index_col=0
)
updated_metadata = adata.obs.loc[:, ["Donor ID"]].merge(
metadata.loc[:, ["Age at Death", "APOE Genotype", "Severely Affected Donor"]].fillna("N"),
left_on="Donor ID",
right_index=True,
how="left"
)
adata.obs["Age at Death"] = updated_metadata["Age at Death"].copy()
adata.obs["APOE4 Status"] = updated_metadata["APOE Genotype"].copy()
adata.obs = adata.obs.rename(
{"APOE4 Status": "APOE Genotype"},
axis=1,
)
adata.obs["Severely Affected Donor"] = updated_metadata["Severely Affected Donor"].copy()
adata.write(os.path.join(pwd, "output", region, "SEAAD_MTG_RNAseq_all-nuclei." + str(datetime.date(datetime.now())) + ".h5ad"), compression="gzip")
region = "MTG"
date = "2023-05-05"
adata = sc.read_h5ad(os.path.join(pwd, "output", region, "SEAAD_MTG_RNAseq_final-nuclei." + date + ".h5ad"))
metadata = pd.read_excel(
os.path.join(pwd, "input", region, "Supplementary Table 1.xlsx"),
sheet_name="SEA-AD_Cohort_Metadata",
index_col=0
)
updated_metadata = adata.obs.loc[:, ["Donor ID"]].merge(
metadata.loc[:, ["Age at Death", "APOE Genotype", "Severely Affected Donor"]].fillna("N"),
left_on="Donor ID",
right_index=True,
how="left"
)
adata.obs["Age at Death"] = updated_metadata["Age at Death"].copy()
adata.obs["APOE4 Status"] = updated_metadata["APOE Genotype"].copy()
adata.obs = adata.obs.rename(
{"APOE4 Status": "APOE Genotype"},
axis=1,
)
adata.obs["Severely Affected Donor"] = updated_metadata["Severely Affected Donor"].copy()
adata.write(os.path.join(pwd, "output", region, "SEAAD_MTG_RNAseq_final-nuclei." + str(datetime.date(datetime.now())) + ".h5ad"), compression="gzip")
for i in adata.obs["Donor ID"].cat.categories:
print(i)
adata[adata.obs["Donor ID"] == i].write(os.path.join(pwd, "output", region, "donor_objects", i + "_SEAAD_MTG_RNAseq_final-nuclei." + str(datetime.date(datetime.now())) + ".h5ad"), compression="gzip")