Noora_Aulia_Hidayat/TestPython/answer_chapter3_experiments4.py
2024-12-31 11:29:14 +07:00

40 lines
1.3 KiB
Python

import pandas as pd
# Create a dictionary with customer data (replace with your actual data)
url = "https://raw.githubusercontent.com/noora20FH/skripsi_noora2023/main/purchases.csv"
# Create a pandas DataFrame from the dictionary
def data_load():
df = pd.read_csv(url)
return df
#show first five rows
def head_rows():
return data_load().sample(5)
# Descriptive Statistics
# Print additional statistics for each variable (age and total spent)
print("Customer Age:")
print(f" Mean: {data_load()['Age'].mean()}")
print(f" Median: {data_load()['Age'].median()}")
print(f" Standard Deviation: {data_load()['Age'].std()}")
print(f" Skewness: {data_load()['Age'].skew()}")
print(f" Quartiles: {data_load()['Age'].quantile([0.25, 0.5, 0.75])}")
print("\nTotal Spent:")
print(f" Mean: {data_load()['Total Spent (USD)'].mean()}")
print(f" Median: {data_load()['Total Spent (USD)'].median()}")
print(f" Standard Deviation: {data_load()['Total Spent (USD)'].std()}")
print(f" Skewness: {data_load()['Total Spent (USD)'].skew()}")
print(f" Quartiles: {data_load()['Total Spent (USD)'].quantile([0.25, 0.5, 0.75])}")
# Calculate the correlation coefficient between customer age and total spent
def correlation():
correlation = data_load()["Age"].corr(data_load()["Total Spent (USD)"])
return correlation
print(f"Correlation Coefficient: {correlation()}")