# Make sure to install plotly 
# !pip install plotly==4.14.3
import plotly.express as px
# and pandas for data 
import pandas as pd


# We can import the salary dataset locally,
salary_df = pd.read_csv('/Users/michiganboy/Documents/Data/employee_salaries.csv')
# and plot Years of Experience Vs. Salary Using Plotly Express.
px.scatter(salary_df, x = 'Years_of_Experience', y = 'Salary')


# Let's import another more advanced dataset entitled University admission (university_admission)

# GRE Scores (out of 340)
# TOEFL Scores (out of 120)
# University Rating (out of 5)
# Statement of Purpose (SOP) 
# Letter of Recommendation (LOR) Strength (out of 5)
# Undergraduate GPA (out of 10)
# Research Experience (either 0 or 1)
# Chance of admission (ranging from 0 to 1)

admission_df = pd.read_csv('/Users/michiganboy/Documents/Data/university_admission.csv')
px.scatter(admission_df, x = 'GRE Score', y = 'Chance of Admit', color = 'University Rating')


# Let's add a fourth variable "SOP" as the size 

px.scatter(admission_df, x = 'GRE Score', y = 'Chance of Admit', color = 'University Rating', size = 'SOP')


# You can also add more data on hover using hover_data
px.scatter(admission_df, x = 'GRE Score', y = 'Chance of Admit', 
           color = 'University Rating', size = 'SOP', hover_data = ['LOR'])


# Square the SOP column up
admission_df['SOP_sq'] = admission_df['SOP'] ** 2
px.scatter(admission_df, x = 'GRE Score', y = 'Chance of Admit', 
           color = 'University Rating', size = 'SOP_sq', hover_data = ['LOR'])


# Import the crypto currency dataset
crypto_df = pd.read_csv('/Users/michiganboy/Documents/Data/crypto_prices.csv')
crypto_df


px.line(crypto_df, x = 'Date', y = 'BTC-USD Price')


# Plot interactive line plot for Ethereum over time
px.line(crypto_df, x = 'Date', y = 'ETH-USD Price')


# Plot interactive line plot for Litecoin over time
px.line(crypto_df, x = 'Date', y = 'LTC-USD Price')


fig = px.line()

for i in crypto_df.columns[1:]:
    fig.add_scatter(x = crypto_df['Date'], y = crypto_df[i], name = i)
    
fig.show()


# Print out the maximum price for each crypto.
for i in crypto_df.columns[1:]:
    print("Max", i, ":", crypto_df[i].max())

Max BTC-USD Price : 61243.08594
Max ETH-USD Price : 1977.276855
Max LTC-USD Price : 358.3359985


crypto_df[crypto_df['BTC-USD Price'] == crypto_df['BTC-USD Price'].max()]


# Define a dictionary with all crypto allocation in a portfolio
# Note that total summation = 100% 

my_dict = {"allocation %": [20, 20, 20, 20, 20]}
crypto_df = pd.DataFrame(data = my_dict, index = ['BTC', 'ETH', 'LTC', 'XRP', 'ADA'])
crypto_df


# Use Plotly Express to plot a pie chart 
px.pie(crypto_df, values = "allocation %", names = ['BTC', 'ETH', 'LTC', 'XRP', 'ADA'],
      title = "Allocation of Crypto")


new_dict = {"allocation %": [10, 10, 10, 60, 10]}
new_crypto_df = pd.DataFrame(data = new_dict, index = ['BTC', 'ETH', 'LTC', 'XRP', 'ADA'])
px.pie(new_crypto_df, values = "allocation %", names = ['BTC', 'ETH', 'LTC', 'XRP', 'ADA'], 
       title = "New Crypto Allocation", hole = 0.2)


# Gapminder combines data from multiple sources in a time-series format
# Check this out: https://www.gapminder.org/data/
gapminder_df = pd.read_csv('/Users/michiganboy/Documents/Data/gapminder.csv')
gapminder_df


# Gapminder combines data from multiple sources in a time-series format
# Check this out: https://www.gapminder.org/data/
# You can read the data directly as follows: data = px.data.gapminder()

# Alternatively, you can import the data as follows:

data = px.data.gapminder()
data

# iso_alpha indicates the code of countries.


# Filter the data based on the country of choice
canada_df = data[data['country'] == 'Canada']
canada_df


# Switch the pop label to Population of Canada - more descriptive name, set height.
px.bar(canada_df, x = 'year', y = 'pop', labels = {'pop': "Population of Canada"},
      height = 400)


# You can add hoverdata and color (third dimension) as follows:
px.bar(canada_df, x = 'year', y = 'pop', 
       labels = {'pop': "Population of Canada"},
       color = 'lifeExp',
       hover_data = ['gdpPercap'],
       height = 400)


canada_df = data[data['country'] == 'Egypt']

px.bar(canada_df, x = 'year', y = 'pop', 
       labels = {'pop': "Population of Egypt"},
       color = 'lifeExp',
       hover_data = ['gdpPercap'],
       height = 400)


# Define Job #1 
job_1 = {'Task': 'Scrape data from web', 'Start': '2020-10-10', 'Finish': '2021-01-01'}
job_1

{'Task': 'Scrape data from web', 'Start': '2020-10-10', 'Finish': '2021-01-01'}


# Define Job #2 
job_2 = {'Task': 'Text Analysis', 'Start': '2021-01-01', 'Finish': '2021-03-01'}
job_2

{'Task': 'Text Analysis', 'Start': '2021-01-01', 'Finish': '2021-03-01'}


# Define Job #3
job_3 = {'Task': 'Calibrate Models', 'Start': '2021-03-06', 'Finish': '2021-04-04'}
job_3

{'Task': 'Calibrate Models', 'Start': '2021-03-06', 'Finish': '2021-04-04'}


# Create pd.DataFrame using a list of dictionaries
project_df = pd.DataFrame([job_1, job_2, job_3])
project_df


fig = px.timeline(project_df, x_start = 'Start', x_end = 'Finish', y = 'Task')
fig.update_yaxes(autorange = 'reversed')
fig.show()


# Define Additional Task
job_4 = {'Task': 'Send the Models for Product Team', 'Start': '2021-04-04', 'Finish': '2021-04-05'}
job_4

project_df = pd.DataFrame([job_1, job_2, job_3, job_4])

fig = px.timeline(project_df, x_start = 'Start', x_end = 'Finish', y = 'Task')
fig.update_yaxes(autorange = 'reversed')
fig.show()


# A sunburst plot represents hierarchial data as sectors laid out over several levels of concentric rings

restaurant_df = pd.read_csv('/Users/michiganboy/Documents/Data/restaurant_mini.csv')
restaurant_df


px.sunburst(restaurant_df, path = ['Dining or Takeout', 'Day', 'Age'], values = 'Invoice')


restaurant_df = pd.read_csv('/Users/michiganboy/Documents/Data/restaurant.csv')
restaurant_df


px.sunburst(restaurant_df, path = ['day', 'time', 'sex'], values = 'total_bill')


# Plot scatterplot for two variables only using Plotly Express
fig = px.scatter(admission_df, x = "GRE Score", y = "Chance of Admit")
fig.show()


# Let's add a third variable "University Rating" as a color
fig = px.scatter(admission_df, x = "GRE Score", y = "Chance of Admit", color = "University Rating")
fig.show()


# Let's square the SOP column
admission_df['SOP'] = admission_df['SOP'] ** 2

# Let's add a fourth variable "SOP" as the size 
fig = px.scatter(admission_df, x = "GRE Score", y = "Chance of Admit", color = "University Rating", size= 'SOP')
fig.show()


# Import the crypto currency dataset
crypto_df = pd.read_csv('crypto_prices.csv')
crypto_df


fig = px.line(crypto_df, x = 'Date', y = 'LTC-USD Price')
fig.show()

fig = px.line(crypto_df, x = 'Date', y = 'ETH-USD Price')
fig.show()

# Date = 12/18/2017, Max LTC Price = $358.3
# Date = 03/13/2021, Max BTC Price = $61,000
# Date = 04/01/2021, Max ETH Price = $1977.27


# BTC price = $61,243
# LTC price = $226
# ETH price = 1,924

crypto_df[crypto_df['BTC-USD Price'] == crypto_df['BTC-USD Price'].max()]

crypto_df[crypto_df['Date'] == '3/13/2021']


my_dict =  {'allocation %': [10, 10, 10, 60, 10]}
# explode = (0, 0, 0, 0.2, 0)  

crypto_df = pd.DataFrame(data = my_dict, index = ['BTC', 'ETH', 'LTC', 'XRP', 'ADA'])

# Use Plotly Express to plot a pie chart 
fig = px.pie(crypto_df, values = 'allocation %', names = ['BTC', 'ETH', 'LTC', 'XRP', 'ADA'], title = 'Crypto Portfolio Allocation', hole=0.3)
fig.show()


# Filter the data based on the country of choice
egypt_df = data[data.country == 'Egypt']
egypt_df

fig = px.bar(egypt_df, x = 'year', y = 'pop', color = 'lifeExp', hover_data = ['lifeExp', 'gdpPercap'], labels = {'pop':'population of Canada'}, height = 500)
fig.show()


# Define Job #4
job_4 = {'Task':"Send the course for approval", 'Start':'2021-04-04', 'Finish':'2021-04-05'}
job_4

project_df = pd.DataFrame([job_1, job_2, job_3, job_4])
project_df

fig = px.timeline(project_df, x_start = "Start", x_end = "Finish", y = "Task")
fig.update_yaxes(autorange = "reversed") # otherwise tasks are listed from the bottom up
fig.show()


restaurant_df = pd.read_csv('restaurant.csv')
restaurant_df

fig = px.sunburst(restaurant_df, path=['day', 'time', 'sex'], values='total_bill')
fig.show()

	Date	BTC-USD Price	ETH-USD Price	LTC-USD Price
0	9/17/2014	457.334015	NaN	5.058550
1	9/18/2014	424.440002	NaN	4.685230
2	9/19/2014	394.795990	NaN	4.327770
3	9/20/2014	408.903992	NaN	4.286440
4	9/21/2014	398.821014	NaN	4.245920
...	...	...	...	...
2380	3/28/2021	55950.746090	1691.355957	185.028488
2381	3/29/2021	57750.199220	1819.684937	194.474777
2382	3/30/2021	58917.691410	1846.033691	196.682098
2383	3/31/2021	58918.832030	1918.362061	197.499100
2384	4/1/2021	59095.808590	1977.276855	204.112518

	Unnamed: 0	country	continent	year	lifeExp	pop	gdpPercap	iso_alpha	iso_num
0	0	Afghanistan	Asia	1952	28.801	8425333	779.445314	AFG	4
1	1	Afghanistan	Asia	1957	30.332	9240934	820.853030	AFG	4
2	2	Afghanistan	Asia	1962	31.997	10267083	853.100710	AFG	4
3	3	Afghanistan	Asia	1967	34.020	11537966	836.197138	AFG	4
4	4	Afghanistan	Asia	1972	36.088	13079460	739.981106	AFG	4
...	...	...	...	...	...	...	...	...	...
1699	1699	Zimbabwe	Africa	1987	62.351	9216418	706.157306	ZWE	716
1700	1700	Zimbabwe	Africa	1992	60.377	10704340	693.420786	ZWE	716
1701	1701	Zimbabwe	Africa	1997	46.809	11404948	792.449960	ZWE	716
1702	1702	Zimbabwe	Africa	2002	39.989	11926563	672.038623	ZWE	716
1703	1703	Zimbabwe	Africa	2007	43.487	12311143	469.709298	ZWE	716

	country	continent	year	lifeExp	pop	gdpPercap	iso_alpha	iso_num
0	Afghanistan	Asia	1952	28.801	8425333	779.445314	AFG	4
1	Afghanistan	Asia	1957	30.332	9240934	820.853030	AFG	4
2	Afghanistan	Asia	1962	31.997	10267083	853.100710	AFG	4
3	Afghanistan	Asia	1967	34.020	11537966	836.197138	AFG	4
4	Afghanistan	Asia	1972	36.088	13079460	739.981106	AFG	4
...	...	...	...	...	...	...	...	...
1699	Zimbabwe	Africa	1987	62.351	9216418	706.157306	ZWE	716
1700	Zimbabwe	Africa	1992	60.377	10704340	693.420786	ZWE	716
1701	Zimbabwe	Africa	1997	46.809	11404948	792.449960	ZWE	716
1702	Zimbabwe	Africa	2002	39.989	11926563	672.038623	ZWE	716
1703	Zimbabwe	Africa	2007	43.487	12311143	469.709298	ZWE	716

	country	continent	year	lifeExp	pop	gdpPercap	iso_alpha	iso_num
240	Canada	Americas	1952	68.750	14785584	11367.16112	CAN	124
241	Canada	Americas	1957	69.960	17010154	12489.95006	CAN	124
242	Canada	Americas	1962	71.300	18985849	13462.48555	CAN	124
243	Canada	Americas	1967	72.130	20819767	16076.58803	CAN	124
244	Canada	Americas	1972	72.880	22284500	18970.57086	CAN	124
245	Canada	Americas	1977	74.210	23796400	22090.88306	CAN	124
246	Canada	Americas	1982	75.760	25201900	22898.79214	CAN	124
247	Canada	Americas	1987	76.860	26549700	26626.51503	CAN	124
248	Canada	Americas	1992	77.950	28523502	26342.88426	CAN	124
249	Canada	Americas	1997	78.610	30305843	28954.92589	CAN	124
250	Canada	Americas	2002	79.770	31902268	33328.96507	CAN	124
251	Canada	Americas	2007	80.653	33390141	36319.23501	CAN	124

	Unnamed: 0	total_bill	tip	sex	smoker	day	time	size
0	0	16.99	1.01	Female	No	Sun	Dinner	2
1	1	10.34	1.66	Male	No	Sun	Dinner	3
2	2	21.01	3.50	Male	No	Sun	Dinner	3
3	3	23.68	3.31	Male	No	Sun	Dinner	2
4	4	24.59	3.61	Female	No	Sun	Dinner	4
...	...	...	...	...	...	...	...	...
239	239	29.03	5.92	Male	No	Sat	Dinner	3
240	240	27.18	2.00	Female	Yes	Sat	Dinner	2
241	241	22.67	2.00	Male	Yes	Sat	Dinner	2
242	242	17.82	1.75	Male	No	Sat	Dinner	2
243	243	18.78	3.00	Female	No	Thur	Dinner	2

Plotly Express¶

TASK #2: PLOT INTERACTIVE BUBBLE CHART (SCATTERPLOT WITH SIZE)¶

TASK #3: PLOT INTERACTIVE SINGLE LINEPLOT USING PLOTLY EXPRESS¶

TASK #4: PLOT INTERACTIVE MULTIPLE LINE PLOTS USING PLOTLY EXPRESS¶

TASK #5. PLOT INTERACTIVE PIE CHARTS¶

TASK #6: PLOT INTERACTIVE BAR CHART¶

TASK #7: PLOT INTERACTIVE GANTT CHART¶

TASK #8: PLOT INTERACTIVE SUNBURST¶

MINI CHALLENGE SOLUTIONS¶

GREAT JOB!¶

	Task	Start	Finish
0	Scrape data from web	2020-10-10	2021-01-01
1	Text Analysis	2021-01-01	2021-03-01
2	Calibrate Models	2021-03-06	2021-04-04

	Customer ID	Day	Dining or Takeout	Age	Invoice
0	1	Saturday	Dining	23	45
1	2	Saturday	Dining	22	70
2	3	Sunday	Takeout	26	80
3	4	Sunday	Takeout	30	100