Normal Test


Reading time: about 10 minutes
In [1]:
get_ipython().ast_node_interactivity = 'all'
import matplotlib.pyplot as plt
import numpy as np
In [2]:
vals = np.random.normal(0, 1, 10_000_000)

np.mean(vals), np.std(vals)
Out [2]:
(0.00010162346483672586, 0.9999363897125003)
In [3]:
# PDF

hist = np.histogram(vals, bins=100)

Xs = hist[1][:-1]
Ys = hist[0]

plt.plot(hist[1][:-1], hist[0])
Out [3]:
[]
Out:
<Figure size 640x480 with 1 Axes>
In [4]:
# Approximate the PDF with a polynomial

pdf_poly = np.polyfit(Xs, Ys, 20)

pdf = lambda x: np.polyval(pdf_poly, x)

plt.plot(Xs, pdf(Xs))
Out [4]:
[]
Out:
<Figure size 640x480 with 1 Axes>
In [5]:
# CDF
cdf_vals = []
s = 0.0

for x in Xs:
    s += pdf(x)
    cdf_vals.append(s)

cdf_vals = np.array(cdf_vals) / s

plt.plot(Xs, cdf_vals)
Out [5]:
[]
Out:
<Figure size 640x480 with 1 Axes>
In [17]:
# Approximate the CDF with a polynomial

cdf_poly = np.polyfit(Xs, cdf_vals, 20)

cdf = lambda x: np.polyval(cdf_poly, x)

plt.plot(Xs, cdf(Xs))
Out [17]:
[]
Out:
<Figure size 640x480 with 1 Axes>
In [48]:
# Approximate the inverse CDF with a polynomial
y = cdf(Xs)

# Learn the inverse function
import sklearn.neural_network
from sklearn.preprocessing import MinMaxScaler

inputscaler = MinMaxScaler()
outputscaler = MinMaxScaler()

m = sklearn.neural_network.MLPRegressor(hidden_layer_sizes=(100, 100, 100), max_iter=1000000, activation='logistic')

m.fit(inputscaler.fit_transform(y.reshape(-1, 1)), outputscaler.fit_transform(Xs.reshape(-1, 1)))

def inv_cdf(x):
    inp = inputscaler.transform(x.reshape(-1, 1))
    res = m.predict(inp).reshape(-1, 1)
    return outputscaler.inverse_transform(res)

plt.plot(cdf(Xs), Xs)

xs = np.linspace(0, 1, 500)

_ = plt.plot(xs, inv_cdf(xs))
Out:
/usr/lib/python3.11/site-packages/sklearn/neural_network/_multilayer_perceptron.py:1624: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
Out [48]:
MLPRegressor(activation='logistic', hidden_layer_sizes=(100, 100, 100),
             max_iter=1000000)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
Out [48]:
[]
Out:
<Figure size 640x480 with 1 Axes>
In [45]:
# Sample from the distribution
xs = np.random.uniform(0, 1, 5000)
samples = inv_cdf(xs)

_ = plt.hist(samples, bins=100)
Out:
<Figure size 640x480 with 1 Axes>

The following pages link here

Citation

If you find this work useful, please cite it as:
@article{yaltirakliwikinormaltest,
  title   = "Normal Test",
  author  = "Yaltirakli, Gokberk",
  journal = "gkbrk.com",
  year    = "2024",
  url     = "https://www.gkbrk.com/wiki/normal-test/"
}
Not using BibTeX? Click here for more citation styles.
IEEE Citation
Gokberk Yaltirakli, "Normal Test", December, 2024. [Online]. Available: https://www.gkbrk.com/wiki/normal-test/. [Accessed Dec. 30, 2024].
APA Style
Yaltirakli, G. (2024, December 30). Normal Test. https://www.gkbrk.com/wiki/normal-test/
Bluebook Style
Gokberk Yaltirakli, Normal Test, GKBRK.COM (Dec. 30, 2024), https://www.gkbrk.com/wiki/normal-test/

Comments

© 2024 Gokberk Yaltirakli