[docs]defread_icd_mapping(map_path:str)->pd.DataFrame:"""Reads in mapping table for converting ICD9 to ICD10 codes"""mapping=pd.read_csv(map_path,header=0,delimiter="\t")mapping.diagnosis_description=mapping.diagnosis_description.apply(str.lower)returnmapping
[docs]defget_diagnosis_icd(module_path:str)->pd.DataFrame:"""Reads in diagnosis_icd table"""returnpd.read_csv(module_path+"/hosp/diagnoses_icd.csv.gz",compression="gzip",header=0)
# TODO: Investigate - should this not return `diag` df?
[docs]defstandardize_icd(mapping:pd.DataFrame,diag:pd.DataFrame,map_code_col="diagnosis_code",root=True)->None:"""Takes an ICD9 -> ICD10 mapping table and a diagnosis dataframe; adds column with converted ICD10 column"""count=0code_cols=mapping.columnserrors=[]deficd_9to10(icd):"""Function use to apply over the diag DataFrame for ICD9->ICD10 conversion"""# If root is true, only map an ICD 9 -> 10 according to the# ICD9's root (first 3 digits)ifroot:icd=icd[:3]ifmap_code_colnotincode_cols:errors.append(f"ICD NOT FOUND: {icd}")returnnp.nanmatches=mapping.loc[mapping[map_code_col]==icd]ifmatches.shape[0]==0:errors.append(f"ICD NOT FOUND: {icd}")returnnp.nanreturnmapping.loc[mapping[map_code_col]==icd].icd10cm.iloc[0]# Create new column with original codes as defaultcol_name="root_icd10_convert"diag[col_name]=diag["icd_code"].values# Group identical ICD9 codes, then convert all ICD9 codes within# a group to ICD10forcode,groupindiag.loc[diag.icd_version==9].groupby(by="icd_code"):new_code=icd_9to10(code)foridxingroup.index.values:# Modify values of original df at the indexes in the groupsdiag.at[idx,col_name]=new_codecount+=group.shape[0]# print(f"{count}/{diag.shape[0]} rows processed")# Column for just the roots of the converted ICD10 columndiag["root"]=diag[col_name].apply(lambdax:x[:3]iftype(x)isstrelsenp.nan)
[docs]defpreproc_icd_module(module_path:str,ICD10_code:str,icd_map_path:str)->pd.DataFrame:"""Takes an module dataset with ICD codes and puts it in long_format, mapping ICD-codes by a mapping table path"""diag=get_diagnosis_icd(module_path)icd_map=read_icd_mapping(icd_map_path)standardize_icd(icd_map,diag,root=True)# patient ids that have at least 1 record of the given ICD10 code categorydiag.dropna(subset=["root"],inplace=True)pos_ids=pd.DataFrame(diag.loc[diag.root.str.contains(ICD10_code)].hadm_id.unique(),columns=["hadm_id"],)returnpos_ids