from google.colab import files uploaded = files.upload()
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
print("Environment Ready !")df= pd.read_csv("StudentsPerformance.csv.csv")
# Show first 5 rows
df.head()| gender | race/ethnicity | parental level of education | lunch | test preparation course | math score | reading score | writing score | |
|---|---|---|---|---|---|---|---|---|
| 0 | female | group B | bachelor's degree | standard | none | 72 | 72 | 74 |
| 1 | female | group C | some college | standard | completed | 69 | 90 | 88 |
| 2 | female | group B | master's degree | standard | none | 90 | 95 | 93 |
| 3 | male | group A | associate's degree | free/reduced | none | 47 | 57 | 44 |
| 4 | male | group C | some college | standard | none | 76 | 78 | 75 |
df.isnull().sum()
| 0 | |
|---|---|
| gender | 0 |
| race/ethnicity | 0 |
| parental level of education | 0 |
| lunch | 0 |
| test preparation course | 0 |
| math score | 0 |
| reading score | 0 |
| writing score | 0 |
df.info()
(df[['math score','reading score','writing score']]<0).sum()
| 0 | |
|---|---|
| math score | 0 |
| reading score | 0 |
| writing score | 0 |
avg_math = df["math score"].mean()
avg_read = df["reading score"].mean()
avg_write = df["writing score"].mean()
averages = {"Math Average": avg_math,"Reading Average":avg_read,"Writing Average":avg_write}
averages{'Math Average': np.float64(66.089),
'Reading Average': np.float64(69.169),
'Writing Average': np.float64(68.054)}df["total_score"] = df["math score"]+df["reading score"]+df["writing score"] top_students = df.sort_values(by="total_score",ascending=False).head(5) top_students
| gender | race/ethnicity | parental level of education | lunch | test preparation course | math score | reading score | writing score | total_score | |
|---|---|---|---|---|---|---|---|---|---|
| 916 | male | group E | bachelor's degree | standard | completed | 100 | 100 | 100 | 300 |
| 962 | female | group E | associate's degree | standard | none | 100 | 100 | 100 | 300 |
| 458 | female | group E | bachelor's degree | standard | none | 100 | 100 | 100 | 300 |
| 114 | female | group E | bachelor's degree | standard | completed | 99 | 100 | 100 | 299 |
| 712 | female | group D | some college | standard | none | 98 | 100 | 99 | 297 |
plt.figure()
sns.histplot(df["math score"],kde=True)
plt.title("Distribution of Math Scores")
plt.show()gender_avg = df.groupby("gender")[["math score","reading score","writing score"]].mean()
gender_avg.plot(kind="bar")
plt.title("Average Scores by Gender")
plt.show()parent_avg = df.groupby("parental level of education")["total_score"].mean().sort_values()
parent_avg.plot(kind="barh")
plt.title("Parental Eduaction vs Student Performance")
plt.xlabel("Average Total Score")
plt.show()race_avg = df.groupby("race/ethnicity")[["math score","reading score","writing score"]].mean()
race_avg.plot(kind="bar")
plt.title("Race/Ethnicity vs Average Scores")
plt.show()sns.heatmap(df[["math score","reading score","writing score"]].corr(),annot=True)
plt.title("Correlation Between Subjects")
plt.show()