From 09835e9afa5cd51b0b1e53eb309e8f217fbda853 Mon Sep 17 00:00:00 2001
From: Lenoctambule <106790775+lenoctambule@users.noreply.github.com>
Date: Sun, 29 Mar 2026 08:23:15 +0200
Subject: [PATCH] fix: missing activation func derivative + send error before
 update

---
 autoencoder.py |  8 +++--
 layers.py      | 40 +++++++++++++------------
 mnist_test.py  | 80 ++++++++++++++++++++++++++++++++++++--------------
 utils.py       |  4 ++-
 4 files changed, 88 insertions(+), 44 deletions(-)

diff --git a/autoencoder.py b/autoencoder.py
index 5ddd745..2bbbfc2 100644
--- a/autoencoder.py
+++ b/autoencoder.py
@@ -46,15 +46,15 @@ class Autoencoder:
         with tqdm(bar_format="{desc} {elapsed} {rate_fmt}") as lbar:
             while True:
                 lbar.set_description(
-                    f"{LOADER[epoch % len(LOADER)]} Training ({epoch=} error={prev_error:.2f})", # noqa
+                    f"{LOADER[epoch % len(LOADER)]} Training ({epoch=} error={float(prev_error):.6f}", # noqa
                 )
                 lbar.update()
                 error = 0
-                for x in data_set:
+                for x in tqdm(data_set, leave=False):
                     error += self.train(x)
                 error /= len(data_set)
                 derror = prev_error - error
-                if derror <= 0 or abs(derror) < 1e-8:
+                if derror <= 0 or abs(derror) < 1e-4:
                     no_improv += 1
                 else:
                     no_improv = 0
@@ -84,8 +84,10 @@ class Autoencoder:
         return out, code
 
     def save(self, path: str):
+        path = path.removesuffix('.npy')
         np.save(path, self)
 
     def load(path: str) -> 'Autoencoder':
+        path = path.removesuffix('.npy') + '.npy'
         data = np.load(path, allow_pickle=True)
         return data.item()
diff --git a/layers.py b/layers.py
index 07d3421..1c15247 100644
--- a/layers.py
+++ b/layers.py
@@ -1,6 +1,6 @@
 import numpy as np
 import types
-from utils import regularize
+from utils import normalize
 
 
 class NNLayer:
@@ -12,28 +12,34 @@ class NNLayer:
         self.W = np.random.uniform(-1, 1, (in_size, out_size))
         self.B = np.zeros((out_size))
         self.lr = lr
-        self.last_input = None
-        self.last_output = None
+        self.input = None
+        self.output = None
+        self.output_linear = None
         self.activation_func = activation_func
 
     def forward(self, V: np.ndarray) -> np.ndarray:
-        self.last_input = V
-        res = V @ self.W + self.B
-        self.last_output = regularize(self.activation_func(res))
-        return self.last_output
+        self.input = normalize(V)
+        self.output_linear = self.input @ self.W + self.B
+        self.output = self.activation_func(
+                self.output_linear
+            )
+        return self.output
 
     def backprop(self, error: np.ndarray) -> np.ndarray:
-        dW = np.outer(self.last_input, error)
-        self.W -= self.lr * dW
-        self.B -= self.lr * error
-        return error @ self.W.T
+        error *= self.activation_func(self.output_linear, True)
+        ret = self.W @ error
+        dW = np.outer(self.input, error) * self.lr
+        dB = error * self.lr
+        self.W -= dW
+        self.B -= dB
+        return ret
 
 
 class DeepNNLayer:
     def __init__(self,
                  layers: list[int],
                  lr: float,
-                 activation_func):
+                 activation_func: types.FunctionType):
         self.layers: list[NNLayer] = []
         for i in range(len(layers) - 1):
             self.layers.append(
@@ -45,13 +51,11 @@ class DeepNNLayer:
                 )
 
     def forward(self, v: np.ndarray) -> np.ndarray:
-        v_i = v
         for layer in self.layers:
-            v_i = layer.forward(v_i)
-        return v_i
+            v = layer.forward(v)
+        return v
 
     def backprop(self, error: np.ndarray) -> np.ndarray:
-        error_i = error
         for layer in self.layers[::-1]:
-            error_i = layer.backprop(error_i)
-        return error_i
+            error = layer.backprop(error)
+        return error
diff --git a/mnist_test.py b/mnist_test.py
index db1f9a2..e5fa485 100644
--- a/mnist_test.py
+++ b/mnist_test.py
@@ -1,7 +1,7 @@
 import matplotlib.pyplot as plt
 import numpy as np
 from autoencoder import Autoencoder
-from utils import relu
+from utils import relu, regularize
 
 
 def load_mnist() -> list[np.ndarray]:
@@ -18,7 +18,7 @@ def load_mnist() -> list[np.ndarray]:
 
 
 def mnist_train(
-        bottleneck: int,
+        filename: str,
         max_epoch: int,
         patience: int,
         ):
@@ -29,9 +29,9 @@ def mnist_train(
     x_train = x_train / 255
     x_test = x_test / 255
     autoencoder = Autoencoder(
-        [in_len, bottleneck],
-        [bottleneck, in_len],
-        0.1,
+        [in_len, 64, 16],
+        [16, 64, in_len],
+        0.01,
         relu
     )
     autoencoder.train_dataset(
@@ -39,24 +39,39 @@ def mnist_train(
         max_epoch,
         patience,
         display_loss=True)
-    autoencoder.save("autoencoder_mnist")
+    autoencoder.save(filename)
 
 
-def mnist_test():
-    x_train, _, x_test, _ = load_mnist()
+def mnist_test(filename: str):
+    x_train, _, x_test, y_test = load_mnist()
     in_len = x_train[0].shape[0] * x_train[0].shape[0]
     img_shape = x_train[0].shape
     x_train.resize(x_train.shape[0], in_len)
     x_test.resize(x_test.shape[0], in_len)
     x_train = x_train / 255
     x_test = x_test / 255
-    autoencoder = Autoencoder.load('autoencoder_mnist.npy')
-    example: np.ndarray = x_test[np.random.randint(0, len(x_test))]
-    output, _ = autoencoder.forward(example.flatten())
-    plt.subplot(1, 2, 1)
-    plt.matshow(example.reshape(img_shape), fignum=False)
-    plt.subplot(1, 2, 2)
-    plt.matshow(output.reshape(img_shape), fignum=False)
+    autoencoder: Autoencoder = Autoencoder.load(filename)
+    for i in autoencoder.encoder.layers:
+        print(len(i.input), len(i.output))
+    idx = np.random.randint(0, len(x_test))
+    example: np.ndarray = x_test[idx]
+    output, code = autoencoder.forward(example.flatten())
+    output = regularize(output)
+    plt.subplot(1, 3, 1)
+    plt.matshow(
+        example.reshape(img_shape),
+        fignum=False)
+    plt.title(f"Input ({y_test[idx]})")
+    plt.subplot(1, 3, 2)
+    plt.matshow(
+        output.reshape(img_shape),
+        fignum=False)
+    plt.title(f"Output ({y_test[idx]})")
+    plt.subplot(1, 3, 3)
+    s = int(np.ceil(np.sqrt(code.shape[0])))
+    code.resize((s, s), refcheck=False)
+    plt.matshow(code, fignum=False)
+    plt.title(f"Code ({y_test[idx]})")
     plt.show()
 
 
@@ -65,13 +80,34 @@ if __name__ == "__main__":
     import sys
 
     parser = argparse.ArgumentParser()
-    parser.add_argument('-b', type=int, nargs='?', default=50)
-    parser.add_argument('-e', type=int, nargs='?', default=1000)
-    parser.add_argument('-p', type=int, nargs='?', default=5)
-    parser.add_argument('-r', action='store_true')
+    parser.add_argument(
+        '-e',
+        type=int,
+        nargs='?',
+        default=1000,
+        help='Max epochs'
+    )
+    parser.add_argument(
+        '-p',
+        type=int,
+        nargs='?',
+        default=5,
+        help='Patience'
+    )
+    parser.add_argument(
+        '-m',
+        type=str, nargs='?',
+        default='autoencoder_mnist.npy',
+        help='Model filename to save in run mode or load in training mode'
+    )
+    parser.add_argument(
+        '-r',
+        action='store_true',
+        help='Run mode'
+    )
     args = parser.parse_args(sys.argv[1:])
     if args.r:
-        mnist_test()
+        mnist_test(args.m)
     else:
-        mnist_train(args.b, args.e, args.p)
-        mnist_test()
+        mnist_train(args.m, args.e, args.p)
+        mnist_test(args.m)
diff --git a/utils.py b/utils.py
index 1bcf986..f3b2413 100644
--- a/utils.py
+++ b/utils.py
@@ -9,7 +9,9 @@ def softmax(v: np.ndarray) -> np.ndarray:
     return exp_v / np.sum(exp_v)
 
 
-def relu(x: np.ndarray) -> np.ndarray:
+def relu(x: np.ndarray, derivative=False) -> np.ndarray:
+    if derivative:
+        return x > 0
     return x * (x > 0)