40 lines
1.3 KiB
Python
40 lines
1.3 KiB
Python
import pandas as pd
|
|
|
|
# Create a dictionary with customer data (replace with your actual data)
|
|
|
|
url = "https://raw.githubusercontent.com/noora20FH/skripsi_noora2023/main/purchases.csv"
|
|
|
|
# Create a pandas DataFrame from the dictionary
|
|
|
|
def data_load():
|
|
df = pd.read_csv(url)
|
|
return df
|
|
|
|
#show first five rows
|
|
def head_rows():
|
|
return data_load().sample(5)
|
|
# Descriptive Statistics
|
|
|
|
|
|
# Print additional statistics for each variable (age and total spent)
|
|
print("Customer Age:")
|
|
print(f" Mean: {data_load()['Age'].mean()}")
|
|
print(f" Median: {data_load()['Age'].median()}")
|
|
print(f" Standard Deviation: {data_load()['Age'].std()}")
|
|
print(f" Skewness: {data_load()['Age'].skew()}")
|
|
print(f" Quartiles: {data_load()['Age'].quantile([0.25, 0.5, 0.75])}")
|
|
|
|
print("\nTotal Spent:")
|
|
print(f" Mean: {data_load()['Total Spent (USD)'].mean()}")
|
|
print(f" Median: {data_load()['Total Spent (USD)'].median()}")
|
|
print(f" Standard Deviation: {data_load()['Total Spent (USD)'].std()}")
|
|
print(f" Skewness: {data_load()['Total Spent (USD)'].skew()}")
|
|
print(f" Quartiles: {data_load()['Total Spent (USD)'].quantile([0.25, 0.5, 0.75])}")
|
|
|
|
|
|
# Calculate the correlation coefficient between customer age and total spent
|
|
def correlation():
|
|
correlation = data_load()["Age"].corr(data_load()["Total Spent (USD)"])
|
|
return correlation
|
|
print(f"Correlation Coefficient: {correlation()}")
|