Differentiating between real and fake

Sam Hopkins

Initialize workspace and read data

#!/usr/bin/python
# Written by Sam Hopkins
import matplotlib.pyplot as plt
import numpy as np
import pandas as pnd
import math

data = pnd.read_csv("data_banknote_authentication.csv")
print(data.head())
##    variance  skewness  kurtosis  entropy  counterfeit
## 0   3.62160    8.6661   -2.8073 -0.44699            0
## 1   4.54590    8.1674   -2.4586 -1.46210            0
## 2   3.86600   -2.6383    1.9242  0.10645            0
## 3   3.45660    9.5228   -4.0112 -3.59440            0
## 4   0.32924   -4.4552    4.5718 -0.98880            0

Calculate mean and SD of each attribute for real and counterfeit bills

real = data[data['counterfeit'] == 0]
counterfeit = data[data['counterfeit'] == 1]

uR = [real['variance'].mean(), real['skewness'].mean(), real['kurtosis'].mean(), real['entropy'].mean()]
uF = [counterfeit['variance'].mean(), counterfeit['skewness'].mean(), counterfeit['kurtosis'].mean(), counterfeit['entropy'].mean()]
sR = [real['variance'].std(), real['skewness'].std(), real['kurtosis'].std(), real['entropy'].std()]
sF = [counterfeit['variance'].std(), counterfeit['skewness'].std(), counterfeit['kurtosis'].std(), counterfeit['entropy'].std()]

Predict whether each banknote is real or counterfeit using distance from means

predictions0 = []
for i, x in data.iterrows():
  distReal = np.sqrt((x['variance'] - uR[0])**2 + (x['skewness'] - uR[1])**2 + (x['kurtosis'] - uR[2])**2 + (x['entropy'] - uR[3])**2)
  distFake = np.sqrt((x['variance'] - uF[0])**2 + (x['skewness'] - uF[1])**2 + (x['kurtosis'] - uF[2])**2 + (x['entropy'] - uF[3])**2)
  predictions0.append(1 if distReal > distFake else 0)

colors0 = []
hits0 = []
for i, x in data.iterrows():
  if x['counterfeit'] == 0 and predictions0[i] == 0:
    hits0.append(1)
    colors0.append("#8888ff")
  elif x['counterfeit'] == 1 and predictions0[i] == 1:
    hits0.append(1)
    colors0.append("#ff8888")
  elif x['counterfeit'] == 1 and predictions0[i] == 0:
    hits0.append(0)
    colors0.append("#ffcccc")
  else:
    hits0.append(0)
    colors0.append("#ccccff")

accuracy0 = 100*sum(hits0)/len(hits0)
acc0Str = "{:.2f}".format(accuracy0)

Plots

plt.scatter(data['variance'], data['skewness'], c=colors0)
plt.text(min(data['variance']) + 2, max(data['skewness']) + 3, "variance vs skewness accuracy " + acc0Str + "%", fontsize=12, verticalalignment='center')

plt.scatter(uR[0], uR[1], color="black", marker="x")
plt.text(uR[0]-1, uR[1]-1, 'Real', fontsize=12, verticalalignment='center')

plt.scatter(uF[0], uF[1], color="black", marker="x")
plt.text(uF[0]-1, uF[1]-1, 'Fake', fontsize=12, verticalalignment='center')

plt.show()

plt.scatter(data['variance'], data['kurtosis'], c=colors0)
plt.text(min(data['variance']) + 2, max(data['kurtosis']) + 3, "variance vs kurtosis accuracy " + acc0Str + "%", fontsize=12, verticalalignment='center')

plt.scatter(uR[0], uR[2], color="black", marker="x")
plt.text(uR[0]-1, uR[2]-1, 'Real', fontsize=12, verticalalignment='center')

plt.scatter(uF[0], uF[2], color="black", marker="x")
plt.text(uF[0]-1, uF[2]-1, 'Fake', fontsize=12, verticalalignment='center')

plt.show()

plt.scatter(data['variance'], data['entropy'], c=colors0)
plt.text(min(data['variance']) + 1, max(data['entropy']) + 1, "variance vs entropy accuracy " + acc0Str + "%", fontsize=12, verticalalignment='center')

plt.scatter(uR[0], uR[3], color="black", marker="x")
plt.text(uR[0]-1, uR[3]-1, 'Real', fontsize=12, verticalalignment='center')

plt.scatter(uF[0], uF[3], color="black", marker="x")
plt.text(uF[0]-1, uF[3]-1, 'Fake', fontsize=12, verticalalignment='center')

plt.show()

plt.scatter(data['skewness'], data['kurtosis'], c=colors0)
plt.text(min(data['skewness']) + 3, max(data['kurtosis']) + 2, "skewness vs kurtosis accuracy " + acc0Str + "%", fontsize=12, verticalalignment='center')

plt.scatter(uR[1], uR[2], color="black", marker="x")
plt.text(uR[1]-1, uR[2]-1, 'Real', fontsize=12, verticalalignment='center')

plt.scatter(uF[1], uF[2], color="black", marker="x")
plt.text(uF[1]-1, uF[2]-1, 'Fake', fontsize=12, verticalalignment='center')

plt.show()

plt.scatter(data['skewness'], data['entropy'], c=colors0)
plt.text(min(data['skewness']) + 2, max(data['entropy']) + 1, "skewness vs entropy accuracy " + acc0Str + "%", fontsize=12, verticalalignment='center')

plt.scatter(uR[1], uR[3], color="black", marker="x")
plt.text(uR[1]-1, uR[3]-1, 'Real', fontsize=12, verticalalignment='center')

plt.scatter(uF[1], uF[3], color="black", marker="x")
plt.text(uF[1]-1, uF[3]-1, 'Fake', fontsize=12, verticalalignment='center')

plt.show()

plt.scatter(data['kurtosis'], data['entropy'], c=colors0)
plt.text(min(data['kurtosis']) + 2, max(data['entropy']) + 1, "kurtosis vs entropy accuracy " + acc0Str + "%", fontsize=12, verticalalignment='center')

plt.scatter(uR[2], uR[3], color="black", marker="x")
plt.text(uR[2]-1, uR[3]-1, 'Real', fontsize=12, verticalalignment='center')

plt.scatter(uF[2], uF[3], color="black", marker="x")
plt.text(uF[2]-1, uF[3]-1, 'Fake', fontsize=12, verticalalignment='center')

plt.show()

Predict whether each banknote is real or counterfeit using gaussian probability density classifier

predictions1 = []
for i, x in data.iterrows():
  ea1 = (-0.5) * ((x['variance'] - uR[0]) / sR[0])**2
  pRealV = (1/(sR[0] * math.sqrt(2*math.pi))) * math.exp(ea1)

  ea2 = (-0.5) * ((x['skewness'] - uR[1]) / sR[1])**2
  pRealS = (1/(sR[1] * math.sqrt(2*math.pi))) * math.exp(ea2)

  ea3 = (-0.5) * ((x['kurtosis'] - uR[2]) / sR[2])**2
  pRealK = (1/(sR[2] * math.sqrt(2*math.pi))) * math.exp(ea3)

  ea4 = (-0.5) * ((x['entropy'] - uR[3]) / sR[3])**2
  pRealE = (1/(sR[3] * math.sqrt(2*math.pi))) * math.exp(ea4)

  eb1 = (-0.5) * ((x['variance'] - uF[0]) / sF[0])**2
  pFakeV = (1/(sF[0] * math.sqrt(2*math.pi))) * math.exp(eb1)

  eb2 = (-0.5) * ((x['skewness'] - uF[1]) / sF[1])**2
  pFakeS = (1/(sF[1] * math.sqrt(2*math.pi))) * math.exp(eb2)

  eb3 = (-0.5) * ((x['kurtosis'] - uF[2]) / sF[2])**2
  pFakeK = (1/(sF[2] * math.sqrt(2*math.pi))) * math.exp(eb3)

  eb4 = (-0.5) * ((x['entropy'] - uF[3]) / sF[3])**2
  pFakeE= (1/(sF[3] * math.sqrt(2*math.pi))) * math.exp(eb4)

  pReal = pRealV * pRealS * pRealE * pRealK
  pFake = pFakeV * pFakeS * pFakeE * pFakeK
  predictions1.append(1 if pReal < pFake else 0)

colors1 = []
hits1 = []
for i, x in data.iterrows():
  if x['counterfeit'] == 0 and predictions1[i] == 0:
    hits1.append(1)
    colors1.append("#8888ff")
  elif x['counterfeit'] == 1 and predictions1[i]  == 1:
    hits1.append(1)
    colors1.append("#ff8888")
  elif x['counterfeit'] == 1 and predictions1[i]  == 0:
    hits1.append(0)
    colors1.append("#ffcccc")
  else:
    hits1.append(0)
    colors1.append("#ccccff")

accuracy1 = 100*sum(hits1)/len(hits1)
acc1Str = "{:.2f}".format(accuracy1)

Plots

plt.scatter(data['variance'], data['skewness'], c=colors1)
plt.text(min(data['variance']) + 2, max(data['skewness']) + 3, "variance vs skewness accuracy " + acc1Str + "%", fontsize=12, verticalalignment='center')

plt.scatter(uR[0], uR[1], color="black", marker="x")
plt.text(uR[0]-1, uR[1]-1, 'Real', fontsize=12, verticalalignment='center')

plt.scatter(uF[0], uF[1], color="black", marker="x")
plt.text(uF[0]-1, uF[1]-1, 'Fake', fontsize=12, verticalalignment='center')

plt.show()

plt.scatter(data['variance'], data['kurtosis'], c=colors1)
plt.text(min(data['variance']) + 2, max(data['kurtosis']) + 3, "variance vs kurtosis accuracy " + acc1Str + "%", fontsize=12, verticalalignment='center')

plt.scatter(uR[0], uR[2], color="black", marker="x")
plt.text(uR[0]-1, uR[2]-1, 'Real', fontsize=12, verticalalignment='center')

plt.scatter(uF[0], uF[2], color="black", marker="x")
plt.text(uF[0]-1, uF[2]-1, 'Fake', fontsize=12, verticalalignment='center')

plt.show()

plt.scatter(data['variance'], data['entropy'], c=colors1)
plt.text(min(data['variance']) + 1, max(data['entropy']) + 1, "variance vs entropy accuracy " + acc1Str + "%", fontsize=12, verticalalignment='center')

plt.scatter(uR[0], uR[3], color="black", marker="x")
plt.text(uR[0]-1, uR[3]-1, 'Real', fontsize=12, verticalalignment='center')

plt.scatter(uF[0], uF[3], color="black", marker="x")
plt.text(uF[0]-1, uF[3]-1, 'Fake', fontsize=12, verticalalignment='center')

plt.show()

plt.scatter(data['skewness'], data['kurtosis'], c=colors1)
plt.text(min(data['skewness']) + 3, max(data['kurtosis']) + 2, "skewness vs kurtosis accuracy " + acc1Str + "%", fontsize=12, verticalalignment='center')

plt.scatter(uR[1], uR[2], color="black", marker="x")
plt.text(uR[1]-1, uR[2]-1, 'Real', fontsize=12, verticalalignment='center')

plt.scatter(uF[1], uF[2], color="black", marker="x")
plt.text(uF[1]-1, uF[2]-1, 'Fake', fontsize=12, verticalalignment='center')

plt.show()

plt.scatter(data['skewness'], data['entropy'], c=colors1)
plt.text(min(data['skewness']) + 2, max(data['entropy']) + 1, "skewness vs entropy accuracy " + acc1Str + "%", fontsize=12, verticalalignment='center')

plt.scatter(uR[1], uR[3], color="black", marker="x")
plt.text(uR[1]-1, uR[3]-1, 'Real', fontsize=12, verticalalignment='center')

plt.scatter(uF[1], uF[3], color="black", marker="x")
plt.text(uF[1]-1, uF[3]-1, 'Fake', fontsize=12, verticalalignment='center')

plt.show()

plt.scatter(data['kurtosis'], data['entropy'], c=colors1)
plt.text(min(data['kurtosis']) + 2, max(data['entropy']) + 1, "kurtosis vs entropy accuracy " + acc1Str + "%", fontsize=12, verticalalignment='center')

plt.scatter(uR[2], uR[3], color="black", marker="x")
plt.text(uR[2]-1, uR[3]-1, 'Real', fontsize=12, verticalalignment='center')

plt.scatter(uF[2], uF[3], color="black", marker="x")
plt.text(uF[2]-1, uF[3]-1, 'Fake', fontsize=12, verticalalignment='center')

plt.show()