Regression of a binary vector to obtain another binary vector

I am working in the security field. I have a dataset, called X, of binary victors. So the sample is a binary vector, for example

sample in X = [1,0,0,0,1,0,…n] where the n=32

Each sample in X have its Y. where the Y is a dataset of binary vectors. Where each sample is a binary vector composed of m cases:

sample in Y = [0,1,1,0,1,0,…n] where the n=16

I want to build and learn a machine learning model, learn to do regression. I mean the model take X as input and regression Y. Scince I am working in security field I want to ensure that the predicted binary vector should be very close to the right one, with no flap in bits.

I should use an autoencoder model. Because I want later to generate new data (New X and Y). But now I am focusing in prediction. I want the predict Test vector should be correct 90% or 100%. I accept only 1 bit error.

I have build a model but the model always wrong in protecting the Y and did mistake in 5 or 6 bits.

This is my code:

from keras.layers import Lambda, Input, Dense, Dropout, BatchNormalization
from keras.models import Model
from keras import backend as K

import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf

from sklearn import set_config

#Set the random seed for consistent results
import random
#clear session for each run

#Load digits data

X, Y_original =X, Y


# reparameterization trick
# instead of sampling from Q(z|X), sample eps = N(0,I)
# z = z_mean + sqrt(var)*eps
def sampling(args):
    z_mean, z_log_var = args
    batch = K.shape(z_mean)[0]
    dim = K.int_shape(z_mean)[1]
    # by default, random_normal has mean=0 and std=1.0
    epsilon = K.random_normal(shape=(batch, dim))
    thre = K.random_uniform(shape=(batch,1))
    return z_mean + K.exp(0.5 * z_log_var) * epsilon

# Define VAE model components
intermediate_dim = 32 // 1
latent_dim = 32

# Encoder network
inputs_x = Input(shape=input_dim, name='encoder_input')
inputs_x_dropout = Dropout(0.25)(inputs_x)
inputs_x_dropout = Dense(1024, activation='relu')(inputs_x)
inputs_x_dropout = BatchNormalization()(inputs_x_dropout)
inputs_x_dropout = Dense(512, activation='relu')(inputs_x_dropout)
inputs_x_dropout = BatchNormalization()(inputs_x_dropout)
inputs_x_dropout = Dense(224, activation='relu')(inputs_x_dropout)
inputs_x_dropout = BatchNormalization()(inputs_x_dropout)
inter_x1 = Dense(128, activation='relu')(inputs_x_dropout)
inter_x2 = Dense(intermediate_dim, activation='relu')(inter_x1)

z_mean = Dense(latent_dim, name='z_mean')(inter_x2)
z_log_var = Dense(latent_dim, name='z_log_var')(inter_x2)
z = Lambda(sampling, output_shape=(latent_dim,), name='z')([z_mean, z_log_var])
encoder = Model(inputs_x, [z_mean, z_log_var, z], name='encoder')

# Decoder network for reconstruction
latent_inputs = Input(shape=(latent_dim,), name='z_sampling')
inter_y1 = Dense(intermediate_dim, activation='relu')(latent_inputs)
inter_y1 = Dense(224, activation='relu')(inter_y1)
inter_y1 = BatchNormalization()(inter_y1)
inter_y1 = Dense(512, activation='relu')(inter_y1)
inter_y1 = BatchNormalization()(inter_y1)
inter_y1 = Dense(1024, activation='relu')(inter_y1)
inter_y1 = BatchNormalization()(inter_y1)
inter_y2 = Dense(128, activation='relu')(inter_y1)
outputs_reconstruction = Dense(input_dim, activation='sigmoid')(inter_y2)
decoder = Model(latent_inputs, outputs_reconstruction, name='decoder')

# Separate network for multilabel indicator prediction from inter_y2
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import Adam
latent_inputs = Input(shape=(latent_dim,), name='z_sampling')

latent_input_for_predictor = Input(shape=(latent_dim,))
x = Dense(64, activation='relu')(latent_input_for_predictor)
x = BatchNormalization()(x)
predictor_output = Dense(16, activation='linear')(x)  # 16 binary cases

# Create and compile the predictor model
predictor = Model(inputs=latent_input_for_predictor, outputs=predictor_output)
optimizer = Adam(learning_rate=0.001)
predictor.compile(loss='mean_squared_error', optimizer=optimizer, metrics=['mse'])
decoder.compile(loss='binary_crossentropy', optimizer=optimizer )

# Train the models
# Assuming X, Y, XX, YY are properly defined and preprocessed
latent_representations = encoder.predict(X)[2]

#history_prediction =, Y, epochs=200, batch_size=32, shuffle=True, validation_data=(XX, YY))

#history_reconstruction =, X, epochs=200, batch_size=32, shuffle=True, validation_data=(XX, XX))

# Instantiate VAE model with two outputs
outputs_vae = [decoder(z), predictor(z)]
vae = Model(inputs_x, outputs_vae, name='vae_mlp')
vae.compile(optimizer='nadam', loss='binary_crossentropy',metrics='mse')

# Train the model
val_size = 360 #20% val size

from collections import defaultdict
metrics = defaultdict(list)

plt.figure(figsize=(12, 4))

plt.subplot(1, 2, 1)
plt.plot(history_reconstruction.history['loss'], label='Decoder Training Loss')
plt.plot(history_reconstruction.history['val_loss'], label='Decoder Validation Loss')
plt.title('Decoder Loss')

plt.subplot(1, 2, 2)
plt.plot(history_prediction.history['loss'], label='Predictor Training Loss')
plt.plot(history_prediction.history['val_loss'], label='Predictor Validation Loss')
plt.title('Predictor Loss')
plt.legend()"BrmEnco_Updated.h5", overwrite=True)"BrmDeco_Updated.h5", overwrite=True)"BrmPred_Updated.h5", overwrite=True)"BrmAut_Updated.h5", overwrite=True)

Please help me