In [1]:
get_ipython().ast_node_interactivity = 'all' import matplotlib.pyplot as plt import numpy as np
In [2]:
vals = np.random.normal(0, 1, 10_000_000) np.mean(vals), np.std(vals)
Out [2]:
(0.00010162346483672586, 0.9999363897125003)
In [3]:
# PDF hist = np.histogram(vals, bins=100) Xs = hist[1][:-1] Ys = hist[0] plt.plot(hist[1][:-1], hist[0])
Out [3]:
[]
Out:
In [4]:
# Approximate the PDF with a polynomial pdf_poly = np.polyfit(Xs, Ys, 20) pdf = lambda x: np.polyval(pdf_poly, x) plt.plot(Xs, pdf(Xs))
Out [4]:
[]
Out:
In [5]:
# CDF cdf_vals = [] s = 0.0 for x in Xs: s += pdf(x) cdf_vals.append(s) cdf_vals = np.array(cdf_vals) / s plt.plot(Xs, cdf_vals)
Out [5]:
[]
Out:
In [17]:
# Approximate the CDF with a polynomial cdf_poly = np.polyfit(Xs, cdf_vals, 20) cdf = lambda x: np.polyval(cdf_poly, x) plt.plot(Xs, cdf(Xs))
Out [17]:
[]
Out:
In [48]:
# Approximate the inverse CDF with a polynomial y = cdf(Xs) # Learn the inverse function import sklearn.neural_network from sklearn.preprocessing import MinMaxScaler inputscaler = MinMaxScaler() outputscaler = MinMaxScaler() m = sklearn.neural_network.MLPRegressor(hidden_layer_sizes=(100, 100, 100), max_iter=1000000, activation='logistic') m.fit(inputscaler.fit_transform(y.reshape(-1, 1)), outputscaler.fit_transform(Xs.reshape(-1, 1))) def inv_cdf(x): inp = inputscaler.transform(x.reshape(-1, 1)) res = m.predict(inp).reshape(-1, 1) return outputscaler.inverse_transform(res) plt.plot(cdf(Xs), Xs) xs = np.linspace(0, 1, 500) _ = plt.plot(xs, inv_cdf(xs))
Out:
/usr/lib/python3.11/site-packages/sklearn/neural_network/_multilayer_perceptron.py:1624: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel(). y = column_or_1d(y, warn=True)
Out [48]:
MLPRegressor(activation='logistic', hidden_layer_sizes=(100, 100, 100), max_iter=1000000)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.MLPRegressor(activation='logistic', hidden_layer_sizes=(100, 100, 100), max_iter=1000000)
Out [48]:
[]
Out:
In [45]:
# Sample from the distribution xs = np.random.uniform(0, 1, 5000) samples = inv_cdf(xs) _ = plt.hist(samples, bins=100)
Out: