fix: missing activation func derivative + send error before update

This commit is contained in:
Lenoctambule
2026-03-29 08:23:15 +02:00
parent efd328e530
commit 09835e9afa
4 changed files with 88 additions and 44 deletions

View File

@@ -46,15 +46,15 @@ class Autoencoder:
with tqdm(bar_format="{desc} {elapsed} {rate_fmt}") as lbar:
while True:
lbar.set_description(
f"{LOADER[epoch % len(LOADER)]} Training ({epoch=} error={prev_error:.2f})", # noqa
f"{LOADER[epoch % len(LOADER)]} Training ({epoch=} error={float(prev_error):.6f}", # noqa
)
lbar.update()
error = 0
for x in data_set:
for x in tqdm(data_set, leave=False):
error += self.train(x)
error /= len(data_set)
derror = prev_error - error
if derror <= 0 or abs(derror) < 1e-8:
if derror <= 0 or abs(derror) < 1e-4:
no_improv += 1
else:
no_improv = 0
@@ -84,8 +84,10 @@ class Autoencoder:
return out, code
def save(self, path: str):
path = path.removesuffix('.npy')
np.save(path, self)
def load(path: str) -> 'Autoencoder':
path = path.removesuffix('.npy') + '.npy'
data = np.load(path, allow_pickle=True)
return data.item()

View File

@@ -1,6 +1,6 @@
import numpy as np
import types
from utils import regularize
from utils import normalize
class NNLayer:
@@ -12,28 +12,34 @@ class NNLayer:
self.W = np.random.uniform(-1, 1, (in_size, out_size))
self.B = np.zeros((out_size))
self.lr = lr
self.last_input = None
self.last_output = None
self.input = None
self.output = None
self.output_linear = None
self.activation_func = activation_func
def forward(self, V: np.ndarray) -> np.ndarray:
self.last_input = V
res = V @ self.W + self.B
self.last_output = regularize(self.activation_func(res))
return self.last_output
self.input = normalize(V)
self.output_linear = self.input @ self.W + self.B
self.output = self.activation_func(
self.output_linear
)
return self.output
def backprop(self, error: np.ndarray) -> np.ndarray:
dW = np.outer(self.last_input, error)
self.W -= self.lr * dW
self.B -= self.lr * error
return error @ self.W.T
error *= self.activation_func(self.output_linear, True)
ret = self.W @ error
dW = np.outer(self.input, error) * self.lr
dB = error * self.lr
self.W -= dW
self.B -= dB
return ret
class DeepNNLayer:
def __init__(self,
layers: list[int],
lr: float,
activation_func):
activation_func: types.FunctionType):
self.layers: list[NNLayer] = []
for i in range(len(layers) - 1):
self.layers.append(
@@ -45,13 +51,11 @@ class DeepNNLayer:
)
def forward(self, v: np.ndarray) -> np.ndarray:
v_i = v
for layer in self.layers:
v_i = layer.forward(v_i)
return v_i
v = layer.forward(v)
return v
def backprop(self, error: np.ndarray) -> np.ndarray:
error_i = error
for layer in self.layers[::-1]:
error_i = layer.backprop(error_i)
return error_i
error = layer.backprop(error)
return error

View File

@@ -1,7 +1,7 @@
import matplotlib.pyplot as plt
import numpy as np
from autoencoder import Autoencoder
from utils import relu
from utils import relu, regularize
def load_mnist() -> list[np.ndarray]:
@@ -18,7 +18,7 @@ def load_mnist() -> list[np.ndarray]:
def mnist_train(
bottleneck: int,
filename: str,
max_epoch: int,
patience: int,
):
@@ -29,9 +29,9 @@ def mnist_train(
x_train = x_train / 255
x_test = x_test / 255
autoencoder = Autoencoder(
[in_len, bottleneck],
[bottleneck, in_len],
0.1,
[in_len, 64, 16],
[16, 64, in_len],
0.01,
relu
)
autoencoder.train_dataset(
@@ -39,24 +39,39 @@ def mnist_train(
max_epoch,
patience,
display_loss=True)
autoencoder.save("autoencoder_mnist")
autoencoder.save(filename)
def mnist_test():
x_train, _, x_test, _ = load_mnist()
def mnist_test(filename: str):
x_train, _, x_test, y_test = load_mnist()
in_len = x_train[0].shape[0] * x_train[0].shape[0]
img_shape = x_train[0].shape
x_train.resize(x_train.shape[0], in_len)
x_test.resize(x_test.shape[0], in_len)
x_train = x_train / 255
x_test = x_test / 255
autoencoder = Autoencoder.load('autoencoder_mnist.npy')
example: np.ndarray = x_test[np.random.randint(0, len(x_test))]
output, _ = autoencoder.forward(example.flatten())
plt.subplot(1, 2, 1)
plt.matshow(example.reshape(img_shape), fignum=False)
plt.subplot(1, 2, 2)
plt.matshow(output.reshape(img_shape), fignum=False)
autoencoder: Autoencoder = Autoencoder.load(filename)
for i in autoencoder.encoder.layers:
print(len(i.input), len(i.output))
idx = np.random.randint(0, len(x_test))
example: np.ndarray = x_test[idx]
output, code = autoencoder.forward(example.flatten())
output = regularize(output)
plt.subplot(1, 3, 1)
plt.matshow(
example.reshape(img_shape),
fignum=False)
plt.title(f"Input ({y_test[idx]})")
plt.subplot(1, 3, 2)
plt.matshow(
output.reshape(img_shape),
fignum=False)
plt.title(f"Output ({y_test[idx]})")
plt.subplot(1, 3, 3)
s = int(np.ceil(np.sqrt(code.shape[0])))
code.resize((s, s), refcheck=False)
plt.matshow(code, fignum=False)
plt.title(f"Code ({y_test[idx]})")
plt.show()
@@ -65,13 +80,34 @@ if __name__ == "__main__":
import sys
parser = argparse.ArgumentParser()
parser.add_argument('-b', type=int, nargs='?', default=50)
parser.add_argument('-e', type=int, nargs='?', default=1000)
parser.add_argument('-p', type=int, nargs='?', default=5)
parser.add_argument('-r', action='store_true')
parser.add_argument(
'-e',
type=int,
nargs='?',
default=1000,
help='Max epochs'
)
parser.add_argument(
'-p',
type=int,
nargs='?',
default=5,
help='Patience'
)
parser.add_argument(
'-m',
type=str, nargs='?',
default='autoencoder_mnist.npy',
help='Model filename to save in run mode or load in training mode'
)
parser.add_argument(
'-r',
action='store_true',
help='Run mode'
)
args = parser.parse_args(sys.argv[1:])
if args.r:
mnist_test()
mnist_test(args.m)
else:
mnist_train(args.b, args.e, args.p)
mnist_test()
mnist_train(args.m, args.e, args.p)
mnist_test(args.m)

View File

@@ -9,7 +9,9 @@ def softmax(v: np.ndarray) -> np.ndarray:
return exp_v / np.sum(exp_v)
def relu(x: np.ndarray) -> np.ndarray:
def relu(x: np.ndarray, derivative=False) -> np.ndarray:
if derivative:
return x > 0
return x * (x > 0)