fix: missing activation func derivative + send error before update

This commit is contained in:
Lenoctambule
2026-03-29 08:23:15 +02:00
parent efd328e530
commit 09835e9afa
4 changed files with 88 additions and 44 deletions

View File

@@ -46,15 +46,15 @@ class Autoencoder:
with tqdm(bar_format="{desc} {elapsed} {rate_fmt}") as lbar: with tqdm(bar_format="{desc} {elapsed} {rate_fmt}") as lbar:
while True: while True:
lbar.set_description( lbar.set_description(
f"{LOADER[epoch % len(LOADER)]} Training ({epoch=} error={prev_error:.2f})", # noqa f"{LOADER[epoch % len(LOADER)]} Training ({epoch=} error={float(prev_error):.6f}", # noqa
) )
lbar.update() lbar.update()
error = 0 error = 0
for x in data_set: for x in tqdm(data_set, leave=False):
error += self.train(x) error += self.train(x)
error /= len(data_set) error /= len(data_set)
derror = prev_error - error derror = prev_error - error
if derror <= 0 or abs(derror) < 1e-8: if derror <= 0 or abs(derror) < 1e-4:
no_improv += 1 no_improv += 1
else: else:
no_improv = 0 no_improv = 0
@@ -84,8 +84,10 @@ class Autoencoder:
return out, code return out, code
def save(self, path: str): def save(self, path: str):
path = path.removesuffix('.npy')
np.save(path, self) np.save(path, self)
def load(path: str) -> 'Autoencoder': def load(path: str) -> 'Autoencoder':
path = path.removesuffix('.npy') + '.npy'
data = np.load(path, allow_pickle=True) data = np.load(path, allow_pickle=True)
return data.item() return data.item()

View File

@@ -1,6 +1,6 @@
import numpy as np import numpy as np
import types import types
from utils import regularize from utils import normalize
class NNLayer: class NNLayer:
@@ -12,28 +12,34 @@ class NNLayer:
self.W = np.random.uniform(-1, 1, (in_size, out_size)) self.W = np.random.uniform(-1, 1, (in_size, out_size))
self.B = np.zeros((out_size)) self.B = np.zeros((out_size))
self.lr = lr self.lr = lr
self.last_input = None self.input = None
self.last_output = None self.output = None
self.output_linear = None
self.activation_func = activation_func self.activation_func = activation_func
def forward(self, V: np.ndarray) -> np.ndarray: def forward(self, V: np.ndarray) -> np.ndarray:
self.last_input = V self.input = normalize(V)
res = V @ self.W + self.B self.output_linear = self.input @ self.W + self.B
self.last_output = regularize(self.activation_func(res)) self.output = self.activation_func(
return self.last_output self.output_linear
)
return self.output
def backprop(self, error: np.ndarray) -> np.ndarray: def backprop(self, error: np.ndarray) -> np.ndarray:
dW = np.outer(self.last_input, error) error *= self.activation_func(self.output_linear, True)
self.W -= self.lr * dW ret = self.W @ error
self.B -= self.lr * error dW = np.outer(self.input, error) * self.lr
return error @ self.W.T dB = error * self.lr
self.W -= dW
self.B -= dB
return ret
class DeepNNLayer: class DeepNNLayer:
def __init__(self, def __init__(self,
layers: list[int], layers: list[int],
lr: float, lr: float,
activation_func): activation_func: types.FunctionType):
self.layers: list[NNLayer] = [] self.layers: list[NNLayer] = []
for i in range(len(layers) - 1): for i in range(len(layers) - 1):
self.layers.append( self.layers.append(
@@ -45,13 +51,11 @@ class DeepNNLayer:
) )
def forward(self, v: np.ndarray) -> np.ndarray: def forward(self, v: np.ndarray) -> np.ndarray:
v_i = v
for layer in self.layers: for layer in self.layers:
v_i = layer.forward(v_i) v = layer.forward(v)
return v_i return v
def backprop(self, error: np.ndarray) -> np.ndarray: def backprop(self, error: np.ndarray) -> np.ndarray:
error_i = error
for layer in self.layers[::-1]: for layer in self.layers[::-1]:
error_i = layer.backprop(error_i) error = layer.backprop(error)
return error_i return error

View File

@@ -1,7 +1,7 @@
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
import numpy as np import numpy as np
from autoencoder import Autoencoder from autoencoder import Autoencoder
from utils import relu from utils import relu, regularize
def load_mnist() -> list[np.ndarray]: def load_mnist() -> list[np.ndarray]:
@@ -18,7 +18,7 @@ def load_mnist() -> list[np.ndarray]:
def mnist_train( def mnist_train(
bottleneck: int, filename: str,
max_epoch: int, max_epoch: int,
patience: int, patience: int,
): ):
@@ -29,9 +29,9 @@ def mnist_train(
x_train = x_train / 255 x_train = x_train / 255
x_test = x_test / 255 x_test = x_test / 255
autoencoder = Autoencoder( autoencoder = Autoencoder(
[in_len, bottleneck], [in_len, 64, 16],
[bottleneck, in_len], [16, 64, in_len],
0.1, 0.01,
relu relu
) )
autoencoder.train_dataset( autoencoder.train_dataset(
@@ -39,24 +39,39 @@ def mnist_train(
max_epoch, max_epoch,
patience, patience,
display_loss=True) display_loss=True)
autoencoder.save("autoencoder_mnist") autoencoder.save(filename)
def mnist_test(): def mnist_test(filename: str):
x_train, _, x_test, _ = load_mnist() x_train, _, x_test, y_test = load_mnist()
in_len = x_train[0].shape[0] * x_train[0].shape[0] in_len = x_train[0].shape[0] * x_train[0].shape[0]
img_shape = x_train[0].shape img_shape = x_train[0].shape
x_train.resize(x_train.shape[0], in_len) x_train.resize(x_train.shape[0], in_len)
x_test.resize(x_test.shape[0], in_len) x_test.resize(x_test.shape[0], in_len)
x_train = x_train / 255 x_train = x_train / 255
x_test = x_test / 255 x_test = x_test / 255
autoencoder = Autoencoder.load('autoencoder_mnist.npy') autoencoder: Autoencoder = Autoencoder.load(filename)
example: np.ndarray = x_test[np.random.randint(0, len(x_test))] for i in autoencoder.encoder.layers:
output, _ = autoencoder.forward(example.flatten()) print(len(i.input), len(i.output))
plt.subplot(1, 2, 1) idx = np.random.randint(0, len(x_test))
plt.matshow(example.reshape(img_shape), fignum=False) example: np.ndarray = x_test[idx]
plt.subplot(1, 2, 2) output, code = autoencoder.forward(example.flatten())
plt.matshow(output.reshape(img_shape), fignum=False) output = regularize(output)
plt.subplot(1, 3, 1)
plt.matshow(
example.reshape(img_shape),
fignum=False)
plt.title(f"Input ({y_test[idx]})")
plt.subplot(1, 3, 2)
plt.matshow(
output.reshape(img_shape),
fignum=False)
plt.title(f"Output ({y_test[idx]})")
plt.subplot(1, 3, 3)
s = int(np.ceil(np.sqrt(code.shape[0])))
code.resize((s, s), refcheck=False)
plt.matshow(code, fignum=False)
plt.title(f"Code ({y_test[idx]})")
plt.show() plt.show()
@@ -65,13 +80,34 @@ if __name__ == "__main__":
import sys import sys
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument('-b', type=int, nargs='?', default=50) parser.add_argument(
parser.add_argument('-e', type=int, nargs='?', default=1000) '-e',
parser.add_argument('-p', type=int, nargs='?', default=5) type=int,
parser.add_argument('-r', action='store_true') nargs='?',
default=1000,
help='Max epochs'
)
parser.add_argument(
'-p',
type=int,
nargs='?',
default=5,
help='Patience'
)
parser.add_argument(
'-m',
type=str, nargs='?',
default='autoencoder_mnist.npy',
help='Model filename to save in run mode or load in training mode'
)
parser.add_argument(
'-r',
action='store_true',
help='Run mode'
)
args = parser.parse_args(sys.argv[1:]) args = parser.parse_args(sys.argv[1:])
if args.r: if args.r:
mnist_test() mnist_test(args.m)
else: else:
mnist_train(args.b, args.e, args.p) mnist_train(args.m, args.e, args.p)
mnist_test() mnist_test(args.m)

View File

@@ -9,7 +9,9 @@ def softmax(v: np.ndarray) -> np.ndarray:
return exp_v / np.sum(exp_v) return exp_v / np.sum(exp_v)
def relu(x: np.ndarray) -> np.ndarray: def relu(x: np.ndarray, derivative=False) -> np.ndarray:
if derivative:
return x > 0
return x * (x > 0) return x * (x > 0)