# Load the dataset
df = pd.read_excel('/path/to/Neoadj_base_luminaux - anonymise.xls')
# Data Cleaning and Feature Selection
df1 = df.drop([...], axis=1) # Drop irrelevant columns
# Encoding categorical variables
sata_n_map = {'A': 0, 'B': 1, 'C': 2, 'D': 3}
df1['sata_n'] = df1['sata_n'].replace(sata_n_map)
# Filtering data based on 'chim_typ'
filtered_df = df1.loc[(df1['chim_typ'].isin([1, 2, 5]))]
grouped_df = filtered_df.groupby('anap_kc').apply(lambda x: x.reset_index(drop=True))
# Load additional features
X_df = pd.read_csv('/path/to/perso_mec_imp.csv')