# 科学计算模块
import numpy as np
import pandas as pd

# 绘图模块
import matplotlib as mpl
import matplotlib.pyplot as plt


A = np.array([[1, 2, 3], [4, 5, 10]]).T
A

array([[ 1,  4],
       [ 2,  5],
       [ 3, 10]])


A[:, 0]

array([1, 2, 3])


np.corrcoef(A[:, 0], A[:, 1])

array([[1.        , 0.93325653],
       [0.93325653, 1.        ]])


A = np.array([[1, 2, 3], [-1, -1.5, -5]]).T
A

array([[ 1. , -1. ],
       [ 2. , -1.5],
       [ 3. , -5. ]])


plt.plot(A[:, 0], A[:, 1])

[<matplotlib.lines.Line2D at 0x1fd58900648>]


np.corrcoef(A[:, 0], A[:, 1])

array([[ 1.        , -0.91766294],
       [-0.91766294,  1.        ]])


np.random.randn(20)

array([-1.64976142, -0.87343737,  0.07530987, -1.42079571, -0.83262953,
        1.21936676, -0.75871775,  0.44775161,  0.46307329,  1.44154581,
        0.79686385, -1.50887509, -0.53100092,  2.41405101, -0.28564285,
       -1.51317621, -0.90461468, -0.45806723,  1.0310925 , -0.58551109])


X = np.random.randn(20)
y = X + 1


np.corrcoef(X, y)

array([[1., 1.],
       [1., 1.]])


# 对应点图
plt.plot(X, y, 'o')

[<matplotlib.lines.Line2D at 0x1fd53243588>]


a = y.shape
a

(20,)


np.random.normal?

Docstring:
normal(loc=0.0, scale=1.0, size=None)

Draw random samples from a normal (Gaussian) distribution.

The probability density function of the normal distribution, first
derived by De Moivre and 200 years later by both Gauss and Laplace
independently [2]_, is often called the bell curve because of
its characteristic shape (see the example below).

The normal distributions occurs often in nature.  For example, it
describes the commonly occurring distribution of samples influenced
by a large number of tiny, random disturbances, each with its own
unique distribution [2]_.

.. note::
    New code should use the ``normal`` method of a ``default_rng()``
    instance instead; please see the :ref:`random-quick-start`.

Parameters
----------
loc : float or array_like of floats
    Mean ("centre") of the distribution.
scale : float or array_like of floats
    Standard deviation (spread or "width") of the distribution. Must be
    non-negative.
size : int or tuple of ints, optional
    Output shape.  If the given shape is, e.g., ``(m, n, k)``, then
    ``m * n * k`` samples are drawn.  If size is ``None`` (default),
    a single value is returned if ``loc`` and ``scale`` are both scalars.
    Otherwise, ``np.broadcast(loc, scale).size`` samples are drawn.

Returns
-------
out : ndarray or scalar
    Drawn samples from the parameterized normal distribution.

See Also
--------
scipy.stats.norm : probability density function, distribution or
    cumulative density function, etc.
Generator.normal: which should be used for new code.

Notes
-----
The probability density for the Gaussian distribution is

.. math:: p(x) = \frac{1}{\sqrt{ 2 \pi \sigma^2 }}
                 e^{ - \frac{ (x - \mu)^2 } {2 \sigma^2} },

where :math:`\mu` is the mean and :math:`\sigma` the standard
deviation. The square of the standard deviation, :math:`\sigma^2`,
is called the variance.

The function has its peak at the mean, and its "spread" increases with
the standard deviation (the function reaches 0.607 times its maximum at
:math:`x + \sigma` and :math:`x - \sigma` [2]_).  This implies that
normal is more likely to return samples lying close to the mean, rather
than those far away.

References
----------
.. [1] Wikipedia, "Normal distribution",
       https://en.wikipedia.org/wiki/Normal_distribution
.. [2] P. R. Peebles Jr., "Central Limit Theorem" in "Probability,
       Random Variables and Random Signal Principles", 4th ed., 2001,
       pp. 51, 51, 125.

Examples
--------
Draw samples from the distribution:

>>> mu, sigma = 0, 0.1 # mean and standard deviation
>>> s = np.random.normal(mu, sigma, 1000)

Verify the mean and the variance:

>>> abs(mu - np.mean(s))
0.0  # may vary

>>> abs(sigma - np.std(s, ddof=1))
0.1  # may vary

Display the histogram of the samples, along with
the probability density function:

>>> import matplotlib.pyplot as plt
>>> count, bins, ignored = plt.hist(s, 30, density=True)
>>> plt.plot(bins, 1/(sigma * np.sqrt(2 * np.pi)) *
...                np.exp( - (bins - mu)**2 / (2 * sigma**2) ),
...          linewidth=2, color='r')
>>> plt.show()

Two-by-four array of samples from N(3, 6.25):

>>> np.random.normal(3, 2.5, size=(2, 4))
array([[-4.49401501,  4.00950034, -1.81814867,  7.29718677],   # random
       [ 0.39924804,  4.68456316,  4.99394529,  4.84057254]])  # random
Type:      builtin_function_or_method


# 创建一个和y一样形状的服从标准正态分布的随机数组
np.random.normal(size=a)

array([ 0.49622906,  1.3573347 , -0.20178063,  0.87805077, -1.42474422,
       -1.70856044, -1.0952294 , -0.58293826,  1.09455328, -0.68583135,
       -0.64713056,  0.26123903, -0.47562764,  1.39130696,  0.6881981 ,
        0.30883974, -0.19414512,  1.6188312 , -2.05761665,  0.14654045])


np.random.normal?

Docstring:
normal(loc=0.0, scale=1.0, size=None)

Draw random samples from a normal (Gaussian) distribution.

The probability density function of the normal distribution, first
derived by De Moivre and 200 years later by both Gauss and Laplace
independently [2]_, is often called the bell curve because of
its characteristic shape (see the example below).

The normal distributions occurs often in nature.  For example, it
describes the commonly occurring distribution of samples influenced
by a large number of tiny, random disturbances, each with its own
unique distribution [2]_.

.. note::
    New code should use the ``normal`` method of a ``default_rng()``
    instance instead; please see the :ref:`random-quick-start`.

Parameters
----------
loc : float or array_like of floats
    Mean ("centre") of the distribution.
scale : float or array_like of floats
    Standard deviation (spread or "width") of the distribution. Must be
    non-negative.
size : int or tuple of ints, optional
    Output shape.  If the given shape is, e.g., ``(m, n, k)``, then
    ``m * n * k`` samples are drawn.  If size is ``None`` (default),
    a single value is returned if ``loc`` and ``scale`` are both scalars.
    Otherwise, ``np.broadcast(loc, scale).size`` samples are drawn.

Returns
-------
out : ndarray or scalar
    Drawn samples from the parameterized normal distribution.

See Also
--------
scipy.stats.norm : probability density function, distribution or
    cumulative density function, etc.
Generator.normal: which should be used for new code.

Notes
-----
The probability density for the Gaussian distribution is

.. math:: p(x) = \frac{1}{\sqrt{ 2 \pi \sigma^2 }}
                 e^{ - \frac{ (x - \mu)^2 } {2 \sigma^2} },

where :math:`\mu` is the mean and :math:`\sigma` the standard
deviation. The square of the standard deviation, :math:`\sigma^2`,
is called the variance.

The function has its peak at the mean, and its "spread" increases with
the standard deviation (the function reaches 0.607 times its maximum at
:math:`x + \sigma` and :math:`x - \sigma` [2]_).  This implies that
normal is more likely to return samples lying close to the mean, rather
than those far away.

References
----------
.. [1] Wikipedia, "Normal distribution",
       https://en.wikipedia.org/wiki/Normal_distribution
.. [2] P. R. Peebles Jr., "Central Limit Theorem" in "Probability,
       Random Variables and Random Signal Principles", 4th ed., 2001,
       pp. 51, 51, 125.

Examples
--------
Draw samples from the distribution:

>>> mu, sigma = 0, 0.1 # mean and standard deviation
>>> s = np.random.normal(mu, sigma, 1000)

Verify the mean and the variance:

>>> abs(mu - np.mean(s))
0.0  # may vary

>>> abs(sigma - np.std(s, ddof=1))
0.1  # may vary

Display the histogram of the samples, along with
the probability density function:

>>> import matplotlib.pyplot as plt
>>> count, bins, ignored = plt.hist(s, 30, density=True)
>>> plt.plot(bins, 1/(sigma * np.sqrt(2 * np.pi)) *
...                np.exp( - (bins - mu)**2 / (2 * sigma**2) ),
...          linewidth=2, color='r')
>>> plt.show()

Two-by-four array of samples from N(3, 6.25):

>>> np.random.normal(3, 2.5, size=(2, 4))
array([[-4.49401501,  4.00950034, -1.81814867,  7.29718677],   # random
       [ 0.39924804,  4.68456316,  4.99394529,  4.84057254]])  # random
Type:      builtin_function_or_method


ran = np.random.normal(size = X.shape)
ran

array([ 0.26042618, -1.04154116, -0.08313493, -0.79742972, -0.13280839,
        1.27921862,  0.48826155, -0.60279756,  0.60330237, -0.71903143,
        0.2286587 ,  1.9293763 ,  2.45620622,  0.78343275, -0.37187501,
        0.91938857,  1.79980253, -0.45157682, -0.37647247,  1.03357355])


delta = 0.5


r = ran * delta
r

array([ 0.13021309, -0.52077058, -0.04156746, -0.39871486, -0.0664042 ,
        0.63960931,  0.24413077, -0.30139878,  0.30165118, -0.35951571,
        0.11432935,  0.96468815,  1.22810311,  0.39171637, -0.18593751,
        0.45969429,  0.89990127, -0.22578841, -0.18823623,  0.51678678])


y1 = y + r
y1

array([ 0.16849765,  1.33677569,  2.94351355,  1.068285  ,  1.27528793,
        0.41509024, -1.33128073,  1.71470201,  0.40760464,  0.84310426,
        1.86461507, -0.33425138,  1.99110458,  1.67303745,  0.06872278,
        0.07684552,  0.20245359,  0.7843353 ,  1.5186445 ,  2.23064098])


plt.subplot(121)
plt.plot(X, y, 'o')
plt.title('y=x+1')
plt.subplot(122)
plt.plot(X, y1, 'o')
plt.title('y=x+1+r')

<AxesSubplot:>

[<matplotlib.lines.Line2D at 0x1fd56497208>]

Text(0.5, 1.0, 'y=x+1')

<AxesSubplot:>

[<matplotlib.lines.Line2D at 0x1fd564a1dc8>]

Text(0.5, 1.0, 'y=x+1+r')


# delta系数取值数组
dl = [0.5, 0.7, 1, 1.5, 2, 5]


# 空列表容器
yl = []          # 不同delta下y的取值所组成的列表
cl = []          # 不同y下相关系数矩阵所组成的列表


# 计算不同delta下y和相关系数计算情况
for i in dl:
    yn = X + 1 + (ran * i)
    cl.append(np.corrcoef(X, yn))
    yl.append(yn)

cl

[array([[1.       , 0.9367437],
        [0.9367437, 1.       ]]),
 array([[1.       , 0.8911804],
        [0.8911804, 1.       ]]),
 array([[1.        , 0.81961547],
        [0.81961547, 1.        ]]),
 array([[1.        , 0.71248276],
        [0.71248276, 1.        ]]),
 array([[1.        , 0.62837293],
        [0.62837293, 1.        ]]),
 array([[1.        , 0.39817207],
        [0.39817207, 1.        ]])]

yl

[array([ 1.627525  , -0.88382241,  1.86933795, -0.02300046,  0.73433085,
        -0.33109922,  1.33691722,  2.86218277,  0.41444553,  1.9081874 ,
        -1.15456573, -1.10209687,  0.22127724,  1.08855797,  2.65327611,
         0.72665134,  1.91516867,  0.14535214, -0.00467952,  2.90418153]),
 array([ 1.83074703, -0.91569175,  2.18530488, -0.21976011,  0.71340074,
        -0.63292082,  1.10134436,  3.28833318,  0.34776874,  1.78608826,
        -1.14334884, -1.22150795,  0.02903983,  1.2104614 ,  2.61838561,
         0.76623911,  1.93033199,  0.01149336,  0.01992074,  2.97614544]),
 array([ 2.13558008, -0.96349575,  2.65925528, -0.5148996 ,  0.68200557,
        -1.08565322,  0.74798509,  3.92755879,  0.24775356,  1.60293956,
        -1.12652351, -1.40062457, -0.25931629,  1.39331655,  2.56604986,
         0.82562076,  1.95307696, -0.18929479,  0.05682113,  3.0840913 ]),
 array([ 2.64363515, -1.04316909,  3.4491726 , -1.00679874,  0.62968028,
        -1.84020721,  0.15905296,  4.99293481,  0.08106159,  1.29769172,
        -1.0984813 , -1.69915227, -0.73990982,  1.69807513,  2.47882362,
         0.92459017,  1.99098525, -0.52394172,  0.11832178,  3.26400107]),
 array([ 3.15169023, -1.12284243,  4.23908993, -1.49869788,  0.577355  ,
        -2.59476121, -0.42987917,  6.05831082, -0.08563037,  0.99244388,
        -1.07043908, -1.99767997, -1.22050335,  2.00283371,  2.39159737,
         1.02355959,  2.02889354, -0.85858865,  0.17982242,  3.44391085]),
 array([ 6.20002067, -1.60088248,  8.9785939 , -4.45009273,  0.26340331,
        -7.12208519, -3.96347196, 12.45056692, -1.08578218, -0.83904317,
        -0.90218579, -3.78884616, -4.10406454,  3.83138519,  1.86823989,
         1.6173761 ,  2.25634329, -2.86647023,  0.54882631,  4.52336949])]


plt.plot(X, yl[0], 'o')

[<matplotlib.lines.Line2D at 0x1fd56d44988>]


plt.subplot(231)
plt.plot(X, yl[0], 'o')
plt.plot(X, y, 'r-')
plt.subplot(232)
plt.plot(X, yl[1], 'o')
plt.plot(X, y, 'r-')
plt.subplot(233)
plt.plot(X, yl[2], 'o')
plt.plot(X, y, 'r-')
plt.subplot(234)
plt.plot(X, yl[3], 'o')
plt.plot(X, y, 'r-')
plt.subplot(235)
plt.plot(X, yl[4], 'o')
plt.plot(X, y, 'r-')
plt.subplot(236)
plt.plot(X, yl[5], 'o')
plt.plot(X, y, 'r-')

<AxesSubplot:>

[<matplotlib.lines.Line2D at 0x1fd5849a608>]

[<matplotlib.lines.Line2D at 0x1fd5848d988>]

<AxesSubplot:>

[<matplotlib.lines.Line2D at 0x1fd584a11c8>]

[<matplotlib.lines.Line2D at 0x1fd584d5948>]

<AxesSubplot:>

[<matplotlib.lines.Line2D at 0x1fd584fc448>]

[<matplotlib.lines.Line2D at 0x1fd5851d488>]

<AxesSubplot:>

cl

[array([[1.       , 0.9367437],
        [0.9367437, 1.       ]]),
 array([[1.       , 0.8911804],
        [0.8911804, 1.       ]]),
 array([[1.        , 0.81961547],
        [0.81961547, 1.        ]]),
 array([[1.        , 0.71248276],
        [0.71248276, 1.        ]]),
 array([[1.        , 0.62837293],
        [0.62837293, 1.        ]]),
 array([[1.        , 0.39817207],
        [0.39817207, 1.        ]])]

\	Cor\		相关性
0~0.09	没有相关性
0.1~0.3	弱相关
0.3~0.5	中等相关
0.5~1.0	强相关

Whole weight	Rings
1	2
3	4

Lesson 3.1 变量相关性基础理论¶