# Generate fake data

import numpy as np
import pandas as pd
import seaborn as sns
import scipy.stats as stats


# You might be interested in age groups in a city.
young = stats.norm.rvs(25, 5, 500)

senior = stats.norm.rvs(55, 5, 500)

x = np.linspace(0, 100, 101)
final = np.concatenate([young,senior])
# Kernel density estimation
sns.kdeplot(final)

# The probability density peaks on two locations - around 25 and 55. This is an indication that we might
# prefer using bivariate distributions to describe the problem. (Bivariate normal)

<AxesSubplot:ylabel='Density'>


iq_young = stats.norm.rvs(105, 10, 500)
iq_senior = stats.norm.rvs(107, 13, 500)

iq_final = np.concatenate([iq_young, iq_senior])

sns.kdeplot(iq_senior)

# The peak of the IQ distribution occurs at around 105, with the probability density tapers off on both
# sides quite symmetrically.

<AxesSubplot:ylabel='Density'>


type(iq_final)

numpy.ndarray


data = pd.DataFrame({"Age": final, "IQ": iq_final})
data


# Rule: People who are younger than 40 years old are considered young = 0, otherwise old = 1.
data['Senior'] = 0


# For people whose age are younger than 40, you assign 1 to the 'Senior' column.
data['Senior'][data['Age'] > 40] = 1

# Now people who are above 40 will be coded as 1.

<ipython-input-33-cf4132d92236>:2: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['Senior'][data['Age'] > 40] = 1


data


sns.kdeplot(data['IQ'], hue = data['Senior'])

<AxesSubplot:xlabel='IQ', ylabel='Density'>

	Age	IQ
0	21.399652	100.936664
1	25.858700	103.811431
2	23.789812	100.802383
3	24.131691	87.388801
4	20.056220	101.292560
...	...	...
995	59.563507	112.911657
996	58.604462	96.105767
997	54.309280	94.484930
998	52.706558	121.984776
999	56.970346	108.810030

	Age	IQ	Senior
0	21.399652	100.936664	0
1	25.858700	103.811431	0
2	23.789812	100.802383	0
3	24.131691	87.388801	0
4	20.056220	101.292560	0
...	...	...	...
995	59.563507	112.911657	1
996	58.604462	96.105767	1
997	54.309280	94.484930	1
998	52.706558	121.984776	1
999	56.970346	108.810030	1