import pandas as pd # Create a dictionary with customer data (replace with your actual data) url = "https://raw.githubusercontent.com/noora20FH/skripsi_noora2023/main/purchases.csv" # Create a pandas DataFrame from the dictionary def data_load(): df = pd.read_csv(url) return df #show first five rows def head_rows(): return data_load().sample(5) # Descriptive Statistics # Print additional statistics for each variable (age and total spent) print("Customer Age:") print(f" Mean: {data_load()['Age'].mean()}") print(f" Median: {data_load()['Age'].median()}") print(f" Standard Deviation: {data_load()['Age'].std()}") print(f" Skewness: {data_load()['Age'].skew()}") print(f" Quartiles: {data_load()['Age'].quantile([0.25, 0.5, 0.75])}") print("\nTotal Spent:") print(f" Mean: {data_load()['Total Spent (USD)'].mean()}") print(f" Median: {data_load()['Total Spent (USD)'].median()}") print(f" Standard Deviation: {data_load()['Total Spent (USD)'].std()}") print(f" Skewness: {data_load()['Total Spent (USD)'].skew()}") print(f" Quartiles: {data_load()['Total Spent (USD)'].quantile([0.25, 0.5, 0.75])}") # Calculate the correlation coefficient between customer age and total spent def correlation(): correlation = data_load()["Age"].corr(data_load()["Total Spent (USD)"]) return correlation print(f"Correlation Coefficient: {correlation()}")