DBSCAN算法的java实现

现在的位置: 首页 > 综合 > 正文

DBSCAN算法的java实现

2012年06月23日 ⁄ 综合 ⁄ 共 4680字 ⁄ 字号小中大 ⁄ 评论关闭

DBSCAN (Density-Based Spatial Clustering of Applications with Noise) is a data clustering algorithm proposed by Martin Ester, Hans-Peter Kriegel, Jörg Sander and Xiaowei Xu in 1996. It is a density based clustering algorithm because it finds a number of clusters starting from the estimated density distribution of corresponding nodes. DBSCAN is one of the most common clustering algorithms and also most cited in scientific literature.

DBSCAN是一种基于密度的聚类算法，它的基本原理就是给定两个参数，ξ和minp，其中 ξ可以理解为半径，算法将在这个半径内查找样本，minp是一个以ξ为半径查找到的样本个数n的限制条件，只要n>=minp，查找到的样本点就是核心样本点，算法的具体描述见参考文件1，下边是这个算法的java实现：

首先定义一个Point类，代表样本点：

  1: package com.sunzhenxing;

  2: 

  3: public class Point {

  4:   private int x;

  5:   private int y;

  6:   private boolean isKey;

  7:   private boolean isClassed;

  8:   

  9:   public boolean isKey() {

 10:     return isKey;

 11:   }

 12:   public void setKey(boolean isKey) {

 13:     this.isKey = isKey;

 14:     this.isClassed=true;

 15:   }

 16:   public boolean isClassed() {

 17:     return isClassed;

 18:   }

 19:   public void setClassed(boolean isClassed) {

 20:     this.isClassed = isClassed;

 21:   }

 22:   public int getX() {

 23:     return x;

 24:   }

 25:   public void setX(int x) {

 26:     this.x = x;

 27:   }

 28:   public int getY() {

 29:     return y;

 30:   }

 31:   public void setY(int y) {

 32:     this.y = y;

 33:   }

 34:   

 35:   public Point(){

 36:     x=0;

 37:     y=0;

 38:   }

 39:   public Point(int x,int y){

 40:     this.x=x;

 41:     this.y=y;

 42:   }

 43:   public Point(String str){

 44:     String[] p=str.split(",");

 45:     this.x=Integer.parseInt(p[0]);

 46:     this.y=Integer.parseInt(p[1]);

 47:   }

 48:   public String print(){

 49:     return "<"+this.x+","+this.y+">";

 50:   }

 51: }

 52: 


然后定义一个工具类，为算法的实现服务： 
 
  
  
  
  
  
  
  
   9: 
  10: 
  11: 
  12: 
  13: 
  14: 
  15: 
  16: 
  17: 
  18: 
  19: 
  20: 
  21: 
  22: 
  23: 
  24: 
  25: 
  26: 
  27: 
  28: 
  29: 
  30: 
  31: 
  32: 
  33: 
  34: 
  35: 
  36: 
  37: 
  38: 
  39: 
  40: 
  41: 
  42: 
  43: 
  44: 
  45: 
  46: 
  47: 
  48: 
  49: 
  50: 
  51: 
  52: 
  53: 
  54: 
  55: 
  56: 
  57: 
  58: 
  59: 
  60: 
  61: 
  62: 
  63: 
  64: 
  65: 
  66: 
  67: 
  68: 
  69: 
  70: 
  71: 
  72: 
  73: 
  74: 
  75: 
  76: 
  77: 
  78: 
  79: 
  80: 
  81: 
  82: 
  83: 
  84: 
  
  最后在主程序中实现算法，如下所示：

1: package com.sunzhenxing; 2: 3: import java.io.BufferedReader; 4: import java.io.FileReader; 5: import java.io.IOException; 6: import java.util.*; 7: 8: public final class Utility { //计算两点之间的距离 public static double getDistance(Point p,Point q){ int dx=p.getX()-q.getX(); int dy=p.getY()-q.getY(); double distance=Math.sqrt(dx*dx+dy*dy); return distance; } //检测p点是不是核心点，tmpLst存储核心点的直达点 public static List<Point> isKeyPoint(List<Point> lst,Point p,int e,int minp){ int count=0; List<Point> tmpLst=new ArrayList<Point>(); for(Iterator<Point> it=lst.iterator();it.hasNext();){ Point q=it.next(); if(getDistance(p,q)<=e){ ++count; if(!tmpLst.contains(q)){ tmpLst.add(q); } } } if(count>=minp){ p.setKey(true); return tmpLst; } return null; } //合并两个链表，前提是b中的核心点包含在a中 public static boolean mergeList(List<Point> a,List<Point> b){ boolean merge=false; if(a==null || b==null){ return false; } for(int index=0;index<b.size();++index){ Point p=b.get(index); if(p.isKey() && a.contains(p)){ merge=true; break; } } if(merge){ for(int index=0;index<b.size();++index){ if(!a.contains(b.get(index))){ a.add(b.get(index)); } } } return merge; } //获取文本中的样本点集合 public static List<Point> getPointsList() throws IOException{ List<Point> lst=new ArrayList<Point>(); String txtPath="src\\com\\sunzhenxing\\points.txt"; BufferedReader br=new BufferedReader(new FileReader(txtPath)); String str=""; while((str=br.readLine())!=null && str!=""){ lst.add(new Point(str)); } br.close(); return lst; } //显示聚类的结果 public static void display(List<List<Point>> resultList){ int index=1; for(Iterator<List<Point>> it=resultList.iterator();it.hasNext();){ List<Point> lst=it.next(); if(lst.isEmpty()){ continue; } System.out.println("-----第"+index+"个聚类-----"); for(Iterator<Point> it1=lst.iterator();it1.hasNext();){ Point p=it1.next(); System.out.println(p.print()); } index++; } } 85: } 86: 

  1: package com.sunzhenxing;

  2: 

  3: import java.io.IOException;

  4: import java.util.*;

  5: 

  6: public class Dbscan {

  7:   private final static int e=2;//ε半径

  8:   private final static int minp=4;//密度阈值

  9:   private static List<Point> pointsList=new ArrayList<Point>();//存储原始样本点

 10:   private static List<List<Point>> resultList=new ArrayList<List<Point>>();//存储最后的聚类结果

 11:   

 12:   private static void applyDbscan() throws IOException{

 13:     pointsList=Utility.getPointsList();

 14:     for(int index=0;index<pointsList.size();++index){

 15:       List<Point> tmpLst=new ArrayList<Point>();

 16:       Point p=pointsList.get(index);

 17:       if(p.isClassed())

 18:         continue;

 19:       tmpLst=Utility.isKeyPoint(pointsList, p, e, minp);

 20:       if(tmpLst!=null){

 21:         resultList.add(tmpLst);

 22:       }

 23:     }

 24:     int length=resultList.size();

 25:     for(int i=0;i<length;++i){

 26:       for(int j=0;j<length;++j){

 27:         if(i!=j){

 28:           if(Utility.mergeList(resultList.get(i), resultList.get(j))){

 29:             resultList.get(j).clear();

 30:           }

 31:         }

 32:       }

 33:     }

 34:   }

 35:   public static void main(String[] args) {

 36:     try {

 37:       //调用DBSCAN的实现算法

 38:       applyDbscan();

 39:       Utility.display(resultList);

 40:     } catch (IOException e) {

 41:       // TODO Auto-generated catch block

 42:       e.printStackTrace();

 43:     }

 44:     

 45:   }

 46: 

 47: }

 48: 
下边是一个小测试， 即使用src\\com\\sunzhenxing\\points.txt文件的内容进行测试，points.txt的文件内容是：
0,0
0,1
0,2
0,3
0,4
0,5
12,1
12,2
12,3
12,4
12,5
12,6
0,6
0,7
12,7
0,8
0,9
1,1
最后算法的结果是：
-----第1个聚类-----
<0,0>
<0,1>
<0,2>
<1,1>
<0,3>
<0,4>
<0,5>
<0,6>
<0,7>
<0,8>
<0,9>
-----第2个聚类-----
<12,1>
<12,2>
<12,3>
<12,4>
<12,5>
<12,6>
<12,7>
大家画一下坐标就可以理解实验的结论了。

【上篇】ASP.NET MVC 4.0 随笔记录
【下篇】激励的话

作者: weet

该日志由 weet 于12年前发表在综合分类下，最后更新于 2012年06月23日.
转载请注明: DBSCAN算法的java实现 | 学步园 +复制链接

抱歉!评论已关闭.

学步园

DBSCAN算法的java实现

作者: weet

书签

最新文章New

本站推荐

返回首页