DBSCAN聚类

DBSCAN聚类


一、目标

编程实现 DBSCAN 对所给数据的聚类分析:

smile,moon,long,spiral,sizes5,2d-4c,square1,square4

二、算法思想

任意选取一个点p,得到所有从p关于Eps(邻域最大半径)和MinPts(邻域最少点数)密度可达的点。如果p是一个核心点,则找到一个聚类。如果p是一个边界点,没有从p密度可达的点,DBSCAN将访问数据库中的下一个点。

不同数据 esp、minpTs 取值如下:

Name Esp MinPtS
Smile 0.05 8
Moon 0.03 5
Long 0.2 5
Spiral 0.6 10
Sizes5 1.5 12
2d-4c 1 15
Square1 1 10
Square4 1.5 15

三、核心代码

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
import matplotlib.pyplot as plt
import scipy.io as sio
import numpy as np

def inrange(eps, p):
inP = []
point = dataSet[pointIdx]
for idx, pt in enumerate(dataSet):
if distance(point, pt) <= eps:
inP.append(idx)
return inP

def expand(p, inP, eps, minPts):
label = [p]
classfied[p] = True
for p2 in inP:
if not visited[p2]:
visited[p2] = True
inP2 = inrange(p2, eps)
if len(inP2) >= minPts:
n = set(inP)
n.update(set(inP2))
inP.clear()
inP.extend(list(n))
if not classfied[p2]:
label.append(p2)
classfied[p2] = True
if noise[p2]:
noise[p2] = False
return label

def DBSCAN(data, eps=1, minPts=5):
labels = []
flag = False
while not flag:
flag = True
for p, point in enumerate(data):
if visited[p]:
continue
visited[p] = True
inP = inrange(p, eps)
if len(inP) < minPts:
noise[p] = True
else:
label = expand(p, inP, eps, minPts)
labels.append(label)
for p, point in enumerate(data):
if (not classfied[p]) and (not noise[p]):
flag = False
return labels

datapath = "C:\\Users\\Administrator\\Desktop\\DATA\\smile"
data = sio.loadmat(datapath)
Data = data['smile'][:, [0, 1]]

eps = 0.05
minPts = 8
temp = DBSCAN.DBSCAN(Data, eps, minPts)
fin = temp.DBSCANfunc()
num = len(np.unique(fin[:, 1]))

fig = plt.figure(1)
ax = fig.add_subplot(111)
color = ['b','r','c','g']
for i in range(num - 1):
label = Data[np.nonzero(fin[:, 1] == i)]
xs = label[:, 0]
ys = label[:, 1]
ax.scatter(xs, ys, c = color[i], label = "label" + str(i + 1))
noise = Data[np.nonzero(fin[:, 1] == -1)]
noisePlot = ax.scatter(noise[:, 0], noise[:, 1], c = 'y', alpha = 0.7, label = "noise")
ax.legend()
plt.grid()
plt.show()

四、结果图像

  1. Smile

Smile

  1. Moon

    Moon

  2. Long

    Long

  3. Spiral

Spiral

  1. Sizes5

    Sizes5

  2. 2d-4c

    2d-4c

  3. Square1

    Squarel1

  4. Square4

    Square4