现在的位置: 首页 > 综合 > 正文

HDFS API的简单使用(1)

2013年04月05日 ⁄ 综合 ⁄ 共 5437字 ⁄ 字号 评论关闭
package cn.framelife.hadoop;

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;

/**
 * 上传本地文件到HDFS
 * @author 潘广伟
 *
 */
public class CopyFile {
	public static void main(String[] args) {
		
		Configuration configuration = new Configuration();
		
		//如果是远程集群中的hdfs,我们需要有以下两个配置文件 
		configuration.addResource(new Path("core-site.xml"));
		configuration.addResource(new Path("hdfs-site.xml"));
		
		try {
			//根据配置获取到hdfs系统对象
			FileSystem hdfs = FileSystem.get(configuration);
			
			//源文件
			Path src = new Path("/home/benben/abc");
			
			//hdfs服务器中的路径
			Path dst = new Path("/user/");
			
			hdfs.copyFromLocalFile(src, dst);
			System.out.println("upload to"+configuration.get("fs.default.name"));
			
			//遍历hdfs下所有的文件
			FileStatus[] files = hdfs.listStatus(dst);
			for (FileStatus file : files) {
				System.out.println(file.getPath());
			}
		} catch (IOException e) {
			e.printStackTrace();
		}
		
	}
}

package cn.framelife.hadoop;

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;

/**
 * 在HDFS中创建一个文件
 * 
 * @author 潘广伟
 * 
 */
public class CreateFile {

	public static void main(String[] args) {
		Configuration configuration = new Configuration();
		configuration.addResource(new Path("core-site.xml"));
		configuration.addResource(new Path("hdfs-site.xml"));

		try {
			FileSystem hdfs = FileSystem.get(configuration);

			byte[] buffer = "Hello Hadoop".getBytes();

			Path newPath = new Path("/user/hello1.txt");

			FSDataOutputStream outputStream = hdfs.create(newPath);

			outputStream.write(buffer, 0, buffer.length);

		} catch (IOException e) {
			e.printStackTrace();
		}
	}
}

package cn.framelife.hadoop;

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;

/**
 * 删除HDFS文件
 * @author 潘广伟
 *
 */
public class DeleteFile {

	public static void main(String[] args) {
		Configuration configuration = new Configuration();
		configuration.addResource(new Path("core-site.xml"));
		configuration.addResource(new Path("hdfs-site.xml"));
		
		try {
			FileSystem hdfs = FileSystem.get(configuration);
			
			Path pathOfHDFS = new Path("/user/hello1.txt");
			
			//文件是否存在
			if(hdfs.exists(pathOfHDFS)){
				/* 
				 * if path is a directory and set to true, the directory is deleted else throws an exception. 
				 * In case of a file the recursive can be set to either true or false.
				*/
				hdfs.delete(pathOfHDFS, false);
			}
			
		} catch (IOException e) {
			e.printStackTrace();
		}
	}

}

package cn.framelife.hadoop;

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;

public class Rename {

	/**
	 * 给HDFS中的文件重新命名
	 * @param 潘广伟
	 */
	public static void main(String[] args) {
		Configuration configuration = new Configuration();
		configuration.addResource(new Path("core-site.xml"));
		configuration.addResource(new Path("hdfs-site.xml"));
		
		try {
			FileSystem hdfs = FileSystem.get(configuration);
			
			Path pathOfHDFS = new Path("/user/abc.txt");
			Path newPathName = new Path("/user/abcd.txt");
			
			//成功就返回true
			boolean isRename = hdfs.rename(pathOfHDFS, newPathName);
			
			System.out.println(isRename);
			
		} catch (IOException e) {
			e.printStackTrace();
		}
	}

}

package cn.framelife.hadoop;

import java.io.IOException;
import java.util.Arrays;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.BlockLocation;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;

/**
 * 查看某个文件在HDFS集群的位置  
 * @author 潘广伟
 *
 */
public class WhereIsFile {

	public static void main(String[] args) {
		Configuration configuration = new Configuration();
		configuration.addResource(new Path("core-site.xml"));
		configuration.addResource(new Path("hdfs-site.xml"));
		
		try {
			FileSystem hdfs = FileSystem.get(configuration);
			
			Path pathOfHDFS = new Path("/user/hello.txt");
			
			FileStatus fileStatus =  hdfs.getFileStatus(pathOfHDFS);  
	        BlockLocation[] blockLocations = hdfs.getFileBlockLocations(fileStatus, 0, fileStatus.getLen());  
	        for(BlockLocation block : blockLocations){  
	        	System.out.println(Arrays.toString(block.getHosts())+ "\t" + Arrays.toString(block.getNames()));  
	        }  
		} catch (IOException e) {
			e.printStackTrace();
		}
			
		  
	}

}

package cn.framelife.hadoop;

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;

public class GetLastTime {

	/**
	 * 获取HDFS文件的最后修改时间
	 * @param 潘广伟
	 */
	public static void main(String[] args) {
		Configuration configuration = new Configuration();
		configuration.addResource(new Path("core-site.xml"));
		configuration.addResource(new Path("hdfs-site.xml"));
		
		try {
			FileSystem hdfs = FileSystem.get(configuration);
			
			Path pathOfHDFS = new Path("/user/abcd.txt");
			
			FileStatus file = hdfs.getFileStatus(pathOfHDFS);
			long time = file.getModificationTime();
			
			System.out.println(time);
		} catch (IOException e) {
			e.printStackTrace();
		}
	}

}

package cn.framelife.hadoop;

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.DistributedFileSystem;
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;

/**
 * 获取HDFS集群上所有节点名称
 * 
 * @author 潘广伟
 * 
 */
public class GetDataNodeName {
	public static void main(String[] args) {

		Configuration configuration = new Configuration();
		configuration.addResource(new Path("core-site.xml"));
		configuration.addResource(new Path("hdfs-site.xml"));

		DistributedFileSystem hdfs;
		try {
			hdfs = (DistributedFileSystem) FileSystem.get(configuration);
			DatanodeInfo[] dataNodeStats = hdfs.getDataNodeStats();

			for (DatanodeInfo dataNode : dataNodeStats) {
				System.out.println(dataNode.getHostName() + "\t" + dataNode.getName());
			}

		} catch (IOException e) {
			e.printStackTrace();
		}

	}
}
【上篇】
【下篇】

抱歉!评论已关闭.