From 09835e9afa5cd51b0b1e53eb309e8f217fbda853 Mon Sep 17 00:00:00 2001 From: Lenoctambule <106790775+lenoctambule@users.noreply.github.com> Date: Sun, 29 Mar 2026 08:23:15 +0200 Subject: [PATCH] fix: missing activation func derivative + send error before update --- autoencoder.py | 8 +++-- layers.py | 40 +++++++++++++------------ mnist_test.py | 80 ++++++++++++++++++++++++++++++++++++-------------- utils.py | 4 ++- 4 files changed, 88 insertions(+), 44 deletions(-) diff --git a/autoencoder.py b/autoencoder.py index 5ddd745..2bbbfc2 100644 --- a/autoencoder.py +++ b/autoencoder.py @@ -46,15 +46,15 @@ class Autoencoder: with tqdm(bar_format="{desc} {elapsed} {rate_fmt}") as lbar: while True: lbar.set_description( - f"{LOADER[epoch % len(LOADER)]} Training ({epoch=} error={prev_error:.2f})", # noqa + f"{LOADER[epoch % len(LOADER)]} Training ({epoch=} error={float(prev_error):.6f}", # noqa ) lbar.update() error = 0 - for x in data_set: + for x in tqdm(data_set, leave=False): error += self.train(x) error /= len(data_set) derror = prev_error - error - if derror <= 0 or abs(derror) < 1e-8: + if derror <= 0 or abs(derror) < 1e-4: no_improv += 1 else: no_improv = 0 @@ -84,8 +84,10 @@ class Autoencoder: return out, code def save(self, path: str): + path = path.removesuffix('.npy') np.save(path, self) def load(path: str) -> 'Autoencoder': + path = path.removesuffix('.npy') + '.npy' data = np.load(path, allow_pickle=True) return data.item() diff --git a/layers.py b/layers.py index 07d3421..1c15247 100644 --- a/layers.py +++ b/layers.py @@ -1,6 +1,6 @@ import numpy as np import types -from utils import regularize +from utils import normalize class NNLayer: @@ -12,28 +12,34 @@ class NNLayer: self.W = np.random.uniform(-1, 1, (in_size, out_size)) self.B = np.zeros((out_size)) self.lr = lr - self.last_input = None - self.last_output = None + self.input = None + self.output = None + self.output_linear = None self.activation_func = activation_func def forward(self, V: np.ndarray) -> np.ndarray: - self.last_input = V - res = V @ self.W + self.B - self.last_output = regularize(self.activation_func(res)) - return self.last_output + self.input = normalize(V) + self.output_linear = self.input @ self.W + self.B + self.output = self.activation_func( + self.output_linear + ) + return self.output def backprop(self, error: np.ndarray) -> np.ndarray: - dW = np.outer(self.last_input, error) - self.W -= self.lr * dW - self.B -= self.lr * error - return error @ self.W.T + error *= self.activation_func(self.output_linear, True) + ret = self.W @ error + dW = np.outer(self.input, error) * self.lr + dB = error * self.lr + self.W -= dW + self.B -= dB + return ret class DeepNNLayer: def __init__(self, layers: list[int], lr: float, - activation_func): + activation_func: types.FunctionType): self.layers: list[NNLayer] = [] for i in range(len(layers) - 1): self.layers.append( @@ -45,13 +51,11 @@ class DeepNNLayer: ) def forward(self, v: np.ndarray) -> np.ndarray: - v_i = v for layer in self.layers: - v_i = layer.forward(v_i) - return v_i + v = layer.forward(v) + return v def backprop(self, error: np.ndarray) -> np.ndarray: - error_i = error for layer in self.layers[::-1]: - error_i = layer.backprop(error_i) - return error_i + error = layer.backprop(error) + return error diff --git a/mnist_test.py b/mnist_test.py index db1f9a2..e5fa485 100644 --- a/mnist_test.py +++ b/mnist_test.py @@ -1,7 +1,7 @@ import matplotlib.pyplot as plt import numpy as np from autoencoder import Autoencoder -from utils import relu +from utils import relu, regularize def load_mnist() -> list[np.ndarray]: @@ -18,7 +18,7 @@ def load_mnist() -> list[np.ndarray]: def mnist_train( - bottleneck: int, + filename: str, max_epoch: int, patience: int, ): @@ -29,9 +29,9 @@ def mnist_train( x_train = x_train / 255 x_test = x_test / 255 autoencoder = Autoencoder( - [in_len, bottleneck], - [bottleneck, in_len], - 0.1, + [in_len, 64, 16], + [16, 64, in_len], + 0.01, relu ) autoencoder.train_dataset( @@ -39,24 +39,39 @@ def mnist_train( max_epoch, patience, display_loss=True) - autoencoder.save("autoencoder_mnist") + autoencoder.save(filename) -def mnist_test(): - x_train, _, x_test, _ = load_mnist() +def mnist_test(filename: str): + x_train, _, x_test, y_test = load_mnist() in_len = x_train[0].shape[0] * x_train[0].shape[0] img_shape = x_train[0].shape x_train.resize(x_train.shape[0], in_len) x_test.resize(x_test.shape[0], in_len) x_train = x_train / 255 x_test = x_test / 255 - autoencoder = Autoencoder.load('autoencoder_mnist.npy') - example: np.ndarray = x_test[np.random.randint(0, len(x_test))] - output, _ = autoencoder.forward(example.flatten()) - plt.subplot(1, 2, 1) - plt.matshow(example.reshape(img_shape), fignum=False) - plt.subplot(1, 2, 2) - plt.matshow(output.reshape(img_shape), fignum=False) + autoencoder: Autoencoder = Autoencoder.load(filename) + for i in autoencoder.encoder.layers: + print(len(i.input), len(i.output)) + idx = np.random.randint(0, len(x_test)) + example: np.ndarray = x_test[idx] + output, code = autoencoder.forward(example.flatten()) + output = regularize(output) + plt.subplot(1, 3, 1) + plt.matshow( + example.reshape(img_shape), + fignum=False) + plt.title(f"Input ({y_test[idx]})") + plt.subplot(1, 3, 2) + plt.matshow( + output.reshape(img_shape), + fignum=False) + plt.title(f"Output ({y_test[idx]})") + plt.subplot(1, 3, 3) + s = int(np.ceil(np.sqrt(code.shape[0]))) + code.resize((s, s), refcheck=False) + plt.matshow(code, fignum=False) + plt.title(f"Code ({y_test[idx]})") plt.show() @@ -65,13 +80,34 @@ if __name__ == "__main__": import sys parser = argparse.ArgumentParser() - parser.add_argument('-b', type=int, nargs='?', default=50) - parser.add_argument('-e', type=int, nargs='?', default=1000) - parser.add_argument('-p', type=int, nargs='?', default=5) - parser.add_argument('-r', action='store_true') + parser.add_argument( + '-e', + type=int, + nargs='?', + default=1000, + help='Max epochs' + ) + parser.add_argument( + '-p', + type=int, + nargs='?', + default=5, + help='Patience' + ) + parser.add_argument( + '-m', + type=str, nargs='?', + default='autoencoder_mnist.npy', + help='Model filename to save in run mode or load in training mode' + ) + parser.add_argument( + '-r', + action='store_true', + help='Run mode' + ) args = parser.parse_args(sys.argv[1:]) if args.r: - mnist_test() + mnist_test(args.m) else: - mnist_train(args.b, args.e, args.p) - mnist_test() + mnist_train(args.m, args.e, args.p) + mnist_test(args.m) diff --git a/utils.py b/utils.py index 1bcf986..f3b2413 100644 --- a/utils.py +++ b/utils.py @@ -9,7 +9,9 @@ def softmax(v: np.ndarray) -> np.ndarray: return exp_v / np.sum(exp_v) -def relu(x: np.ndarray) -> np.ndarray: +def relu(x: np.ndarray, derivative=False) -> np.ndarray: + if derivative: + return x > 0 return x * (x > 0)