In [1]:
get_ipython().ast_node_interactivity = 'all' import matplotlib.pyplot as plt import numpy as np import matplotlib import math from sklearn import datasets import itertools import random matplotlib.rcParams['figure.dpi'] = 150 iris = datasets.load_iris() X = iris.data Y = iris.target
In [4]:
for x, y in itertools.combinations([0, 1, 2, 3], 2): _ = plt.scatter(X[:, x], X[:, y], c=Y) _ = plt.figure()
Out:
Out:
Out:
Out:
Out:
Out:
Out:
In [6]:
X
Out [6]:
array([[5.1, 3.5, 1.4, 0.2], [4.9, 3. , 1.4, 0.2], [4.7, 3.2, 1.3, 0.2], [4.6, 3.1, 1.5, 0.2], [5. , 3.6, 1.4, 0.2], [5.4, 3.9, 1.7, 0.4], [4.6, 3.4, 1.4, 0.3], [5. , 3.4, 1.5, 0.2], [4.4, 2.9, 1.4, 0.2], [4.9, 3.1, 1.5, 0.1], [5.4, 3.7, 1.5, 0.2], [4.8, 3.4, 1.6, 0.2], [4.8, 3. , 1.4, 0.1], [4.3, 3. , 1.1, 0.1], [5.8, 4. , 1.2, 0.2], [5.7, 4.4, 1.5, 0.4], [5.4, 3.9, 1.3, 0.4], [5.1, 3.5, 1.4, 0.3], [5.7, 3.8, 1.7, 0.3], [5.1, 3.8, 1.5, 0.3], [5.4, 3.4, 1.7, 0.2], [5.1, 3.7, 1.5, 0.4], [4.6, 3.6, 1. , 0.2], [5.1, 3.3, 1.7, 0.5], [4.8, 3.4, 1.9, 0.2], [5. , 3. , 1.6, 0.2], [5. , 3.4, 1.6, 0.4], [5.2, 3.5, 1.5, 0.2], [5.2, 3.4, 1.4, 0.2], [4.7, 3.2, 1.6, 0.2], [4.8, 3.1, 1.6, 0.2], [5.4, 3.4, 1.5, 0.4], [5.2, 4.1, 1.5, 0.1], [5.5, 4.2, 1.4, 0.2], [4.9, 3.1, 1.5, 0.2], [5. , 3.2, 1.2, 0.2], [5.5, 3.5, 1.3, 0.2], [4.9, 3.6, 1.4, 0.1], [4.4, 3. , 1.3, 0.2], [5.1, 3.4, 1.5, 0.2], [5. , 3.5, 1.3, 0.3], [4.5, 2.3, 1.3, 0.3], [4.4, 3.2, 1.3, 0.2], [5. , 3.5, 1.6, 0.6], [5.1, 3.8, 1.9, 0.4], [4.8, 3. , 1.4, 0.3], [5.1, 3.8, 1.6, 0.2], [4.6, 3.2, 1.4, 0.2], [5.3, 3.7, 1.5, 0.2], [5. , 3.3, 1.4, 0.2], [7. , 3.2, 4.7, 1.4], [6.4, 3.2, 4.5, 1.5], [6.9, 3.1, 4.9, 1.5], [5.5, 2.3, 4. , 1.3], [6.5, 2.8, 4.6, 1.5], [5.7, 2.8, 4.5, 1.3], [6.3, 3.3, 4.7, 1.6], [4.9, 2.4, 3.3, 1. ], [6.6, 2.9, 4.6, 1.3], [5.2, 2.7, 3.9, 1.4], [5. , 2. , 3.5, 1. ], [5.9, 3. , 4.2, 1.5], [6. , 2.2, 4. , 1. ], [6.1, 2.9, 4.7, 1.4], [5.6, 2.9, 3.6, 1.3], [6.7, 3.1, 4.4, 1.4], [5.6, 3. , 4.5, 1.5], [5.8, 2.7, 4.1, 1. ], [6.2, 2.2, 4.5, 1.5], [5.6, 2.5, 3.9, 1.1], [5.9, 3.2, 4.8, 1.8], [6.1, 2.8, 4. , 1.3], [6.3, 2.5, 4.9, 1.5], [6.1, 2.8, 4.7, 1.2], [6.4, 2.9, 4.3, 1.3], [6.6, 3. , 4.4, 1.4], [6.8, 2.8, 4.8, 1.4], [6.7, 3. , 5. , 1.7], [6. , 2.9, 4.5, 1.5], [5.7, 2.6, 3.5, 1. ], [5.5, 2.4, 3.8, 1.1], [5.5, 2.4, 3.7, 1. ], [5.8, 2.7, 3.9, 1.2], [6. , 2.7, 5.1, 1.6], [5.4, 3. , 4.5, 1.5], [6. , 3.4, 4.5, 1.6], [6.7, 3.1, 4.7, 1.5], [6.3, 2.3, 4.4, 1.3], [5.6, 3. , 4.1, 1.3], [5.5, 2.5, 4. , 1.3], [5.5, 2.6, 4.4, 1.2], [6.1, 3. , 4.6, 1.4], [5.8, 2.6, 4. , 1.2], [5. , 2.3, 3.3, 1. ], [5.6, 2.7, 4.2, 1.3], [5.7, 3. , 4.2, 1.2], [5.7, 2.9, 4.2, 1.3], [6.2, 2.9, 4.3, 1.3], [5.1, 2.5, 3. , 1.1], [5.7, 2.8, 4.1, 1.3], [6.3, 3.3, 6. , 2.5], [5.8, 2.7, 5.1, 1.9], [7.1, 3. , 5.9, 2.1], [6.3, 2.9, 5.6, 1.8], [6.5, 3. , 5.8, 2.2], [7.6, 3. , 6.6, 2.1], [4.9, 2.5, 4.5, 1.7], [7.3, 2.9, 6.3, 1.8], [6.7, 2.5, 5.8, 1.8], [7.2, 3.6, 6.1, 2.5], [6.5, 3.2, 5.1, 2. ], [6.4, 2.7, 5.3, 1.9], [6.8, 3. , 5.5, 2.1], [5.7, 2.5, 5. , 2. ], [5.8, 2.8, 5.1, 2.4], [6.4, 3.2, 5.3, 2.3], [6.5, 3. , 5.5, 1.8], [7.7, 3.8, 6.7, 2.2], [7.7, 2.6, 6.9, 2.3], [6. , 2.2, 5. , 1.5], [6.9, 3.2, 5.7, 2.3], [5.6, 2.8, 4.9, 2. ], [7.7, 2.8, 6.7, 2. ], [6.3, 2.7, 4.9, 1.8], [6.7, 3.3, 5.7, 2.1], [7.2, 3.2, 6. , 1.8], [6.2, 2.8, 4.8, 1.8], [6.1, 3. , 4.9, 1.8], [6.4, 2.8, 5.6, 2.1], [7.2, 3. , 5.8, 1.6], [7.4, 2.8, 6.1, 1.9], [7.9, 3.8, 6.4, 2. ], [6.4, 2.8, 5.6, 2.2], [6.3, 2.8, 5.1, 1.5], [6.1, 2.6, 5.6, 1.4], [7.7, 3. , 6.1, 2.3], [6.3, 3.4, 5.6, 2.4], [6.4, 3.1, 5.5, 1.8], [6. , 3. , 4.8, 1.8], [6.9, 3.1, 5.4, 2.1], [6.7, 3.1, 5.6, 2.4], [6.9, 3.1, 5.1, 2.3], [5.8, 2.7, 5.1, 1.9], [6.8, 3.2, 5.9, 2.3], [6.7, 3.3, 5.7, 2.5], [6.7, 3. , 5.2, 2.3], [6.3, 2.5, 5. , 1.9], [6.5, 3. , 5.2, 2. ], [6.2, 3.4, 5.4, 2.3], [5.9, 3. , 5.1, 1.8]])
In [2]:
#!/usr/bin/env python3 import math from collections import namedtuple import random import numpy as np np.set_printoptions(precision=3, suppress=True) # Value class Value: __slots__ = ("value", "grad", "parents") def __init__(self, v, g = 0.0, p = tuple()): self.value = v self.grad = g self.parents = p def __repr__(self): return f"Value({self.value}, grad={self.grad})" def __float__(self): return self.value def backprop(self, bp): self.grad += bp for parent, grad in self.parents: parent.backprop(grad * bp) def backward(self): return self.backprop(1.0) def __add__(self, other): v = self.value + other.value g = 0.0 p = ((self, 1.0), (other, 1.0)) return Value(v, g, p) def __sub__(self, other): v = self.value - other.value g = 0.0 p = ((self, 1.0), (other, -1.0)) return Value(v, g, p) def __neg__(self): v = -self.value g = 0.0 p = ((self, -1.0),) return Value(v, g, p) def __mul__(self, other): v = self.value * other.value g = 0.0 p = ((self, other.value), (other, self.value)) return Value(v, g, p) def __truediv__(self, other): v = self.value / other.value g = 0.0 p = ((self, 1 / other.value), (other, (-self.value) / (other.value ** 2))) return Value(v, g, p) def __pow__(self, pow): v = self.value ** pow g = 0.0 p = ((self, pow * self.value ** (pow - 1)),) return Value(v, g, p) def __lt__(self, other): return self.value < other.value def sqrt(self): v = math.sqrt(self.value) g = 0.0 p = ((self, 1 / (2 * v)),) return Value(v, g, p) def tanh(self): v = float(np.tanh(self.value)) g = 0.0 p = ((self, 1 - v ** 2),) return Value(v, g, p) def relu(self): v = max(self.value, 0.0) g = 0.0 p = ((self, 1.0 if self.value > 0.0 else 0.0),) return Value(v, g, p) def logistic_function(self): ex = float(np.exp(self.value)) v = ex / (1 + ex) g = 0.0 p = ((self, v * (1 - v)),) return Value(v, g, p) def swish1(self): x = self.value ex = float(np.exp(x)) e2x = float(np.exp(2 * x)) v = x * ex / (1 + ex) g = 0.0 d = (x + ex + 1) * ex / (e2x + 2 * ex + 1) p = ((self, d),) return Value(v, g, p)
In [3]:
In [3]:
def dist(v1, v2): distsq = Value(0) for x, y in zip(v1, v2): distsq += (x - y) ** 2 return distsq.sqrt()
In [19]:
x = [Value(1), Value(2), Value(3)] y = [Value(1), Value(4), Value(4)] distsq(x, y)
Out [19]:
Value(5, grad=0.0)
In [4]:
def normalize(x): return (x - np.mean(x)) / np.std(x) for i in range(4): X[:, i] = normalize(X[:, i])
In [5]:
for x, y in itertools.combinations([0, 1, 2, 3], 2): _ = plt.scatter(X[:, x], X[:, y], c=Y) _ = plt.figure()
Out:
Out:
Out:
Out:
Out:
Out:
Out:
In [6]:
features = [] for row in range(150): v = [] for i in range(4): val = Value(float(X[row][i])) v.append(val) features.append(v) features[0] X = features
Out [6]:
[Value(-0.9006811702978088, grad=0.0), Value(1.019004351971607, grad=0.0), Value(-1.3402265266227624, grad=0.0), Value(-1.3154442950077403, grad=0.0)]
In []:
gr
In [7]:
GRID_SIZE = 10 MAXDIST = np.linalg.norm(np.array([0, 0]) - np.array([GRID_SIZE, GRID_SIZE])) grid = [] for y in range(GRID_SIZE): for x in range(GRID_SIZE): v = [Value(random.uniform(-1, 1)) for _ in range(4)] grid.append(v) grid[0]
Out [7]:
[Value(0.10152844980007547, grad=0.0), Value(-0.5539298274835704, grad=0.0), Value(-0.22458789388172984, grad=0.0), Value(-0.17667029856877559, grad=0.0)]
In [8]:
def index_to_xy(index): return index % GRID_SIZE, index // GRID_SIZE def xy_to_index(x, y): return y * GRID_SIZE + x
In [8]:
In [9]:
def get_best_index(grid, x): d = lambda y: dist(x, grid[y]) return min(list(range(GRID_SIZE * GRID_SIZE)), key=d) def get_best_xy(grid, vec): return index_to_xy(get_best_index(grid, vec))
In [16]:
# Train def train_samp(grid, features, radius): bestx, besty = get_best_xy(grid, features) bestindex = xy_to_index(bestx, besty) bestPos = np.array([bestx, besty]) loss = Value(0) for y in range(GRID_SIZE): for x in range(GRID_SIZE): index = xy_to_index(x, y) pos = np.array([x, y]) d = np.linalg.norm(bestPos - pos) dw = np.interp(d, (0, MAXDIST), (1, 0)) if d < radius: loss += dist(grid[index], features) * Value(dw) else: loss -= dist(grid[index], features) * Value(dw) return loss maxiter = 10 indices = list(range(len(Y))) for iteration in range(maxiter): random.shuffle(indices) radius = np.interp(iteration, (0, maxiter - 1), (MAXDIST, 0)) loss = Value(0) for i, index in enumerate(indices): loss += train_samp(grid, X[index], radius) print(iteration, i, round(radius, 4), round(loss.value, 4), end=" \r") for v in grid: for p in v: p.grad = 0 loss.backward() for v in grid: for p in v: p.value -= p.grad * 0.0001
Out:
9 149 0.0 -19842.8487
In [11]:
allxs = [] allys = [] for label in range(3): xs = [] ys = [] labels = [] for i, features in enumerate(X): l = Y[i] if l != label: continue x, y = get_best_xy(grid, features) xs.append(x) ys.append(y) allxs.append(x) allys.append(y) labels.append(l) _ = plt.gca().set_ylim([0, GRID_SIZE - 1]) _ = plt.gca().set_xlim([0, GRID_SIZE - 1]) _ = plt.scatter(xs, ys, c=labels) _ = plt.figure() _ = plt.gca().set_xlim([0, GRID_SIZE - 1]) _ = plt.gca().set_ylim([0, GRID_SIZE - 1]) _ = plt.scatter(allxs, allys, c=Y, alpha=0.7)
Out:
Out:
Out:
Out: