%%html
<style type='text/css'>
.CodeMirror{
    font-size: 15px;
    font-family: Jetbrains Mono;
}

</style>


import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import string
import warnings


main_music = pd.read_csv('working_data/biGmusic.csv')
top500 = pd.read_csv('working_data/top500songs.csv', encoding = 'latin-1')
plt.style.use('fivethirtyeight')
%matplotlib inline
main_music.set_index('serial', inplace = True)
warnings.filterwarnings('ignore')


heatmap = main_music.loc[:, 'violence':'romantic'].copy(deep = True)
heatmap.pop('topic')

heatmatrix = heatmap.corr()
plt.figure(figsize = (8, 8))
sns.heatmap(heatmatrix, annot = True, cmap = 'Greens')
plt.show()


years_rom = main_music.groupby('released')['romantic'].mean()

plt.figure(figsize = (15, 5))
plt.plot(years_rom.index, years_rom, color = '#4ABAFF', marker = 'H', linewidth = '2')
plt.ylabel('Romantic-Rating')
plt.title('Romanticness in songs (1950-2019)')
plt.show()


years_pos = main_music.groupby('released')['positivity'].mean()
modern_pos = main_music[main_music['released'] >= 2000].groupby('released')['positivity'].mean()

plt.figure(figsize = (5, 4))
plt.plot(modern_pos.index, modern_pos, color = '#FF004C', marker = 'H', linewidth = '2', label = "Positivity-Rating in Today's Music (2000-2019)")
plt.show()

plt.figure(figsize = (5, 4))
plt.plot(years_pos.index, years_pos, color = 'gray', marker = 'H', linewidth = '1')
plt.show()


sns.jointplot(x = years_pos, y = modern_pos, data = main_music, kind = 'hex')
plt.title('HEX Kernal Density Estimation Plot', y=1.25)
plt.show()


sns.kdeplot(years_pos, color = '#FF004C', label = 'All-Time')
sns.kdeplot(modern_pos, color = '#00B7FF', label = "Today's Music")
plt.xlabel('Positivity Rating')
plt.legend()
plt.show()


sty_music =  main_music[(main_music['released'] >= 1960) & (main_music['released'] < 1970)].groupby('released')['romantic'].median()
tdy_music = main_music[main_music['released'] >= 2010].groupby('released')['romantic'].median()


kde_df = pd.DataFrame()
kde1 = sty_music.copy(deep = True)
kde1.index = [i for i in range(10)]

kde2 = tdy_music.copy(deep = True)
kde2.index = [i for i in range(10)]

kde_df['60s Music'] = kde1
kde_df['21st Century Music'] = kde2
kde_df

sns.jointplot(x = '60s Music', y = '21st Century Music', data = kde_df, kind = 'kde', shade = True)
plt.title('Romantic-Rating in Music', y = 1.27, x = -2.8)
plt.show()


s = main_music[(main_music['released'] >= 1960) & (main_music['released'] < 1970)]
t = main_music[main_music['released'] >= 2010]

sty = s['romantic'].sort_values(ascending = False).head(1000)
tdy = t['romantic'].sort_values(ascending = False).head(1000)

sty.index = (pd.Series(np.linspace(0, 1000, num = 1000, dtype = "int")))
tdy.index = (pd.Series(np.linspace(0, 1000, num = 1000, dtype = "int")))


sns.jointplot(x = tdy, y = sty, data = main_music, kind = 'hex')
plt.xlabel("Today's Music Romantic-Rating")
plt.ylabel("Sixty's Music Romantic-Rating")
sns.set_style('dark')


plt.figure(figsize = (15, 5))

plt.scatter(sty, tdy, c = tdy/sty, cmap = 'cool', marker = 'H', alpha = 0.75)
cbar = plt.colorbar()
cbar.set_label("Today's by Sixty's Music RomRating Ratio")
plt.xlabel('RRating for Sixtys Music')
plt.ylabel('RRating for Todays Music')
plt.title('Romantic-Rating From Top Songs of the two Eras (60s and 2010s)')
plt.xticks(fontsize = '15')
plt.yticks(fontsize = '15')
plt.show()


old = main_music[(main_music['released'] >= 1950) & (main_music['released'] < 1970)]
new = main_music[(main_music['released'] >= 2000)]

o = old.groupby(['released'])[['violence', 'obscene']].median()
n = new.groupby(['released'])[['violence', 'obscene']].median()

plt.figure(figsize = (5, 4))
plt.plot(o.index, o.violence, label = 'Violence', color = '#FF0080', marker = 'H', linewidth = '2')
plt.plot(o.index, o.obscene, label = 'Obscene', color = '#35A9FF', marker = 'H', linewidth = '2')
plt.legend()
plt.title('1950-1969')
plt.show()


plt.figure(figsize = (5, 4))
plt.plot(n.index, n.violence, label = 'Violence', color = '#FF7335', marker = 'H', linewidth = '2')
plt.plot(n.index, n.obscene, label = 'Obscene', color = '#A335FF', marker = 'H', linewidth = '2')
plt.legend()
plt.title('2000-2019')
plt.show()


gen_music = main_music.copy(deep = True)
gen_music['released'] = gen_music['released'].apply(lambda x : f"19{str(x)[2:3]}0s" if str(x)[0] == '1' else f"20{str(x)[2:3]}0s")
pvt_df = pd.DataFrame()
pvt_df['STA Rating'] = gen_music.groupby(['released', 'genre'])['shake the audience'].mean()
mtx_athand = pvt_df.pivot_table(index = 'genre', columns = 'released', values = 'STA Rating')

plt.figure(figsize = (11, 6))
sns.heatmap(mtx_athand, cmap = 'cool')
plt.ylabel('MUSIC GENRE')
plt.xlabel('ERAs')
plt.xticks(fontsize = '13')
plt.yticks(fontsize = '13')
plt.title('STA Rating/Crowd Involvement in the songs of different Eras categorized by Genres', fontsize = '13')
plt.show()

ovl = gen_music.groupby('released')['shake the audience'].mean()
plt.figure(figsize = (8, 5))
plt.plot(ovl.index, ovl, color = 'blue', marker = 'H', linewidth = '2')
plt.ylabel('STA/Crowd Involvement')
plt.title('Overall Comparison')
plt.show()


snp = main_music.groupby('released')[['spiritual', 'positivity']].mean()

plt.figure(figsize = (15, 5))
plt.plot(snp.index, snp.spiritual, color = '#FF35AC', marker = 'H', label = 'Spiritual Rating', linewidth = '2')
plt.plot(snp.index, snp.positivity, color = '#FFAF35', marker = 'H', label = 'Positivity Rating', linewidth = '2')
plt.legend()
plt.title('Spirituality & Positivity Rating')
plt.show()

plt.figure(figsize = (15, 5))
plt.plot(snp.index, snp.positivity, color = '#FFAF35', marker = 'H', label = 'Positivity Rating', linewidth = '2')
plt.legend()
plt.show()


pos_mx = pd.DataFrame()
pos_mx['Positivity Rating'] = gen_music.groupby(['released', 'genre'])['positivity'].mean()
pos_mx['Spiritual Rating'] = gen_music.groupby(['released', 'genre'])['spiritual'].mean()

sns.set(rc = {'figure.figsize':(8,4)})

pos_heat = pos_mx.pivot_table(index = 'genre', columns = 'released', values = 'Positivity Rating')
sns.heatmap(pos_heat, cmap = 'autumn',linecolor = 'white', linewidth = '1')
plt.title('Positivity Rating Throught Different Eras')
plt.show()

spi_heat = pos_mx.pivot_table(index = 'genre', columns = 'released', values = 'Spiritual Rating')
sns.heatmap(spi_heat, cmap = 'spring',linecolor = 'white', linewidth = '1')
plt.title('Spirituality Rating Throught Different Eras')
plt.show()


year_loud = main_music.groupby('released')['loudness'].median()

plt.figure(figsize = (15, 5))
plt.plot(year_loud.index, year_loud, color = '#4AFF97', marker = 'H', linewidth = '2')
plt.ylabel("Loudness Rating")
plt.title('Loudness in Music (1950-2019)')
plt.show()


new = main_music[(main_music['released'] >= 2000)]
mtx = new.pivot_table(index = 'genre', columns = 'released', values = 'sadness')
sns.heatmap(mtx, cmap = 'Blues', linecolor = 'white', linewidth = '1')
plt.show()


top = main_music.groupby(['artist'])['title'].apply(lambda x : len(x)).sort_values(ascending = False).head(50)
plt.figure(figsize = (15, 5))
plt.bar(top.index, top, color = '#34495E', label = 'H', linewidth = '2')
plt.xticks(rotation = '90')
plt.show()


queen_music = main_music[main_music['artist'].str.contains('queen')]
queen_music.index = [i for i in range(queen_music.shape[0])]

beatles_music = main_music[main_music['artist'].str.contains('beatles')]
beatles_music.index = [i for i in range(beatles_music.shape[0])]

qb_sta = pd.DataFrame()
qb_sta['Queen'] = queen_music['shake the audience']
qb_sta['The Beatles'] = beatles_music['shake the audience']
sns.jointplot(x = 'Queen', y = 'The Beatles', data = qb_sta, kind = 'kde', shade = True, cmap = 'cool', cbar = True)


q = queen_music['shake the audience'].sort_values(ascending = False).head(47)
b = beatles_music['shake the audience'].sort_values(ascending = False).head(47)
q.index = np.linspace(1, 47, num = 47, dtype = 'int')
b.index = np.linspace(1, 47, num = 47, dtype = 'int')

plt.figure(figsize = (6.4, 4))
plt.scatter(q, b, c = (q/b), cmap = 'winter', marker = 'h')
plt.xlabel('Queen CI')
plt.ylabel('The Beatles CI')
cbar = plt.colorbar()
cbar.set_label('Queen/The Beatles (CIR)')
plt.title('Crowd Involvement In Songs : A Comparison (The Beatles Vs Queen)')
plt.show()

plt.figure(figsize = (15, 5))
plt.plot(qb_sta.index, qb_sta['Queen'], linewidth = '2', marker = 'H', color = '#35FFB2', label = 'Queen')
plt.plot(qb_sta.index, qb_sta['The Beatles'], linewidth = '2', marker = '*', color = '#35C1FF', label = 'The Beatles')
plt.legend()
plt.show()


w500 = top500.copy(deep = True)
import numpy as np
w500.dropna(inplace = True)
w500['streakn'] = w500['streak'].apply(lambda x : int("".join([str(_) for _ in x.split() if _.isdigit()])) if "".join([str(_) for _ in x.split() if _.isdigit()]) != "" else np.nan)

astrk = pd.DataFrame()
astrk['Weeks'] = w500.groupby('artist')['streakn'].sum()

w500['year'] = w500['released'].apply(lambda x : f"19{x[-2:-1]}0s" if x.split()[-1][0] == '1' else f"20{x[-2:-1]}0s")


astrk = pd.DataFrame()
astrk['Weeks'] = w500.groupby(['artist', 'year'])['streakn'].sum()
sns.set(rc = {'figure.figsize' : (15, 6)})
sns.set_context('paper')
strk_pvt = astrk.sort_values(by = 'Weeks', ascending = False).head(50).pivot_table(index = 'year', columns = 'artist', values = 'Weeks')
sns.heatmap(strk_pvt, cmap = 'copper',linecolor = 'gray', linewidth = '1')
plt.grid()
plt.show()


mj_music = main_music[main_music['artist'] == 'michael jackson']
wk_music = main_music[main_music['artist'] == 'the weeknd']

rm_joint = pd.DataFrame()
mj_music["acoustic"].index = [i for i in range(31)]
wk_music["acoustic"].index = [i for i in range(19)]

rm_joint['MJ'] = mj_music["acoustic"]
rm_joint['TW'] = wk_music["acoustic"]

sns.jointplot(x = 'MJ', y = 'TW', color = 'Red', data = rm_joint, kind = 'kde', shade = True)
plt.plot()

[]


plt.figure(figsize = (18, 6))
plt.style.use('fivethirtyeight')
plt.bar(mj_music['title'], mj_music['danceability'], color = '#273746', label = 'Michael Jackson')
plt.bar(wk_music['title'], wk_music['danceability'], color = '#E74C3C', label = 'The Weeknd')
plt.legend()
plt.xticks(rotation = '90', fontsize = '17')
plt.ylabel('Danceability Rating')
plt.yticks(fontsize = '17')
plt.show()


plt.figure(figsize = (15, 5))
plt.style.use('fivethirtyeight')
mj_music['topic'].value_counts().plot(color = '#273746', label = 'Michael Jackson', linewidth = '2', marker = 'H')
wk_music['topic'].value_counts().plot(color = '#E74C3C', label = 'The Weeknd', linewidth = '2', marker = 'x')
plt.legend()
plt.xticks(fontsize = '15')
plt.ylabel('No. of Songs')
plt.title('Most Popular Song Topics b/w Michael Jackson & The Weeknd')
plt.show()


lz = main_music[main_music['artist'] == 'led zeppelin']
tb = main_music[main_music['artist'] == 'the beatles']
tw = main_music[main_music['artist'] == 'the who']
rs = main_music[main_music['artist'] == 'the rolling stones']
pf = main_music[main_music['artist'] == 'pink floyd']


a = lz.groupby('artist')['romantic', 'obscene', 'danceability', 'sadness', 'positivity', 'loudness', 'acoustic', 'energy'].mean().iloc[0]
b = tb.groupby('artist')['romantic', 'obscene', 'danceability', 'sadness', 'positivity', 'loudness', 'acoustic', 'energy'].mean().iloc[0]
c = tw.groupby('artist')['romantic', 'obscene', 'danceability', 'sadness', 'positivity', 'loudness', 'acoustic', 'energy'].mean().iloc[0]
d = rs.groupby('artist')['romantic', 'obscene', 'danceability', 'sadness', 'positivity', 'loudness', 'acoustic', 'energy'].mean().iloc[0]
e = pf.groupby('artist')['romantic', 'obscene', 'danceability', 'sadness', 'positivity', 'loudness', 'acoustic', 'energy'].mean().iloc[0]

x_ind = np.arange(len(a))
w = 0.15

plt.figure(figsize = (15, 5))
plt.bar(x_ind-2*w, a, width = w, color = '#4CD8FF', label = 'Led Zeppelin')
plt.bar(x_ind-w, b, width = w, color = '#75FF00', label = 'The Beatles')
plt.bar(x_ind, c, width = w, color = '#5D6D7E', label = 'The Who')
plt.bar(x_ind+w, d, width = w, color = '#A700FF', label = 'The Rolling Stones')
plt.bar(x_ind+2*w, e, width = w, color = '#FF4CFC', label = 'Pink Floyd')


plt.xticks(ticks = x_ind, labels = a.index)
plt.legend(fontsize = '15')
plt.xticks(fontsize = '15')
plt.yticks(fontsize = '15')
plt.legend()
plt.show()


eti = main_music.groupby('released')['title_length'].mean()

plt.figure(figsize = (15, 5))
plt.style.use('fivethirtyeight')
plt.plot(eti.index, eti, color = '#44FF35', linewidth = '2', marker = 'H')
plt.xticks(fontsize = '15')
plt.yticks(fontsize = '15')
plt.ylabel('No. of Words in Title')
plt.show()


sad_songs = main_music[main_music['topic'] == 'sadness']
sad_songs['title'] = sad_songs['title'].apply(lambda x : x[:1].upper() if x[:1] in string.ascii_letters else np.nan)
sad_songs.dropna(inplace = True)

plt.figure(figsize = (15, 4))
plt.bar([x for x in string.ascii_uppercase], sad_songs['title'].value_counts().sort_index(), color = '#FF3587')
plt.xticks(fontsize = '15')
plt.ylabel('No. of Songs')
plt.yticks(fontsize = '15')
plt.title('Starting Letters in Sad Songs (1950s - 2019)')
plt.show()


slendf = pd.DataFrame()
slendf['No. of Words in Lyrics'] = gen_music.groupby(['released', 'genre'])['song_length'].median()

slen_pvt = slendf.pivot_table(index = 'genre', columns = 'released', values = 'No. of Words in Lyrics')

plt.figure(figsize = (8, 6))
plt.style.use('fivethirtyeight')
sns.set_context('paper')
sns.heatmap(slen_pvt, cmap = 'coolwarm', cbar = True)
plt.title('No. of Words in Lyrics in different genres (1950s-2019)', y = 1.02)
plt.plot()

[]


romantic = main_music[main_music['topic'] == 'romantic']
rs = romantic.groupby('released')['song_length'].median()
year_wise_lyrics = main_music.groupby('released')['song_length'].median()


plt.figure(figsize =(15, 5))
plt.plot(year_wise_lyrics.index, year_wise_lyrics, linestyle = '--' ,color = 'gray', marker = '*', linewidth = '2', label = 'All-Topic Median')
plt.plot(rs.index, rs, color = '#4AFF82', linewidth = '2', marker = 'H', label = 'Romantic Songs/Topic')
plt.legend(fontsize = '15')
plt.xticks(fontsize = '15')
plt.yticks(fontsize = '15')
plt.title('Lyrics-Length in Romantic vs All-Topic Median')
plt.ylabel('No. of words')

plt.show()

MUSIC OF THE AGES¶

STYLING CONFIGS¶

INTRODUCTION¶

PACKAGE IMPORTS¶

DATASET IMPORTS & MATPLOTLIB CONFIGS¶

HOW DO DIFFERENT RATINGS RELATE TO THEMSELVES?¶

Let's Talk About The Overall Heatmap¶

SEGMENT - 1¶

Topic : Past & The Present¶

Questions & Topics to be discussed :¶

1. WHICH ERA OF MUSIC PRODUCED THE MOST ROMANTIC SONGS?¶

2. IS TODAY'S MUSIC MORE NEGATIVE THAN THE AVERAGE?¶

3. WERE THE 60S MORE ROMANTIC THAN TODAY?¶

4. IS TODAY'S MUSIC MORE VIOLENT AND OBSCENE THAN 50-60S?¶

5. WHICH GENRATION'S MUSIC HAD THE MOST CROWD INVOLVEMENT / STA RATING?¶

6. SEARCH FOR THE MOST SPIRITUAL & POSITIVE MUSIC GENRATION.¶

7. IS TODAY'S MUSIC MORE LOUDER THAN YESTERDAY'S?¶

8. ANALYSIS OF SADNESS IN MODERN MUSIC :¶

SEGMENT - 2¶

Topic : Artists Against Each Other¶

Questions & Topics to be discussed :¶

1. ARTISTS WITH THE MOST POPULAR SONGS SINCE THE 1950s?¶

2. CROWD INVOLVEMENT & STA COMPARISON : THE BEATLES VS QUEEN¶

WHO ARE THE TOP ARTISTS WITH HIGHEST NUMBER OF STREAKS IN THE TOP CHARTS?¶

4. MICHAEL JACKSON VS THE WEEKND.¶

COMPARISONS B/W MOST POPULAR ROCK BANDS OF THE 60s & 70s.¶

SEGMENT - 3¶

Topic : Playing with Statistics¶

Questions & Topics to be discussed :¶

1. WHICH ERA HAD THE LONGEST SONG TITLES EVER?¶

2. WHICH STARTING LETTERS ARE MOST COMMONLY USED FOR SAD SONGS?¶

3. WHICH ERA & GENRE OF MUSIC USUALLY CONTRIBUTES TO THE MOST LENGTHY LYRICS?¶

4. ARE ROMANTIC SONGS MORE LENGTHY THAN ON AVERAGE?¶

CITATIONS & REFERENCES¶