现在的位置: 首页 > 综合 > 正文

Linux编程实践——head和tail的实现

2012年02月23日 ⁄ 综合 ⁄ 共 6219字 ⁄ 字号 评论关闭

head和tail

他们看起来跟cat好像是近亲,做的事情差不多,都是读取文件内容,head从文件头读起,tai则从文件尾读取,默认读取10行并打印,这里两个命令都只实现了默认打印10行和指定打印行数参数-n。

实现方法

这里除了使用常用的文件操作open、close、read接口外,在tail实现里还是用了lseek,在线手册说明如下:

NAME
lseek - reposition read/write file offset

SYNOPSIS
#include <sys/types.h>
#include <unistd.h>

off_t lseek(int fd, off_t offset, int whence);

DESCRIPTION
The lseek() function repositions the offset of the open file associated with the
file descriptor fd to the argument offset according to the directive whence as
follows:

SEEK_SET
The offset is set to offset bytes.

SEEK_CUR
The offset is set to its current location plus offset bytes.

SEEK_END
The offset is set to the size of the file plus offset bytes.

The lseek() function allows the file offset to be set beyond the end of the file
(but this does not change the size of the file). If data is later written at
this point, subsequent reads of the data in the gap (a "hole") return null bytes
('\0') until data is actually written into the gap.

RETURN VALUE
Upon successful completion, lseek() returns the resulting offset location as
measured in bytes from the beginning of the file. Otherwise, a value of
(off_t) -1 is returned and errno is set to indicate the error.
ERRORS
EBADF fd is not an open file descriptor.

EINVAL whence is not one of SEEK_SET, SEEK_CUR, SEEK_END; or the resulting file
offset would be negative, or beyond the end of a seekable device.

EOVERFLOW
The resulting file offset cannot be represented in an off_t.

ESPIPE fd is associated with a pipe, socket, or FIFO.

CONFORMING TO
SVr4, 4.3BSD, POSIX.1-2001.

NOTES
This document's use of whence is incorrect English, but maintained for histori‐
cal reasons.

Some devices are incapable of seeking and POSIX does not specify which devices
must support lseek().

On Linux, using lseek() on a tty device returns ESPIPE.

When converting old code, substitute values for whence with the following
macros:
old new
0 SEEK_SET

1 SEEK_CUR
2 SEEK_END
L_SET SEEK_SET
L_INCR SEEK_CUR
L_XTND SEEK_END

SVr1-3 returns long instead of off_t, BSD returns int.

Note that file descriptors created by dup(2) or fork(2) share the current file
position pointer, so seeking on such files may be subject to race conditions.

SEE ALSO
dup(2), fork(2), open(2), fseek(3), lseek64(3), posix_fallocate(3)

COLOPHON
This page is part of release 3.24 of the Linux man-pages project. A description
of the project, and information about reporting bugs, can be found at
http://www.kernel.org/doc/man-pages/.

Linux 2001-09-24 LSEEK(2)

它涉及到一个神秘的只有内核可以操作的当前指针,在这里有一点讨论可以参考。

实现head

设一换行('\n')换行计数器,使用缓冲技术读取,对读取内容分析并做换行计数器的累计,累计到需要显示的数量跳出。

hhead.c

#include <stdio.h>
#include <string.h>
#include <getopt.h>
#include <unistd.h>
#include <fcntl.h>
#include <stdlib.h>

#define READSIZE 4096 //读取文件内容长度


int main(int argc, char* argv[]){
int rfd = -1, //文件描述符
rlen=0, //每次read文件的内容长度
i,
nline=10,//读取文件的行数(默认10)
nent=0, //'\n'计数器
ret;
char rbuf[READSIZE], //读取文件缓冲区
c;
char* pb=0; //指向文件每行换行
char* filename; //文件名
memset(rbuf,0,READSIZE);
//程序参数解析
while((c= getopt(argc,argv,"n:")) != -1){
switch(c){
case 'n'://指定显示行数
nline = atoi(optarg);
break;
default:
fprintf(stderr,"Usage: %s [-n lines]",argv[0]);
return -1;
}
}
filename = argv[optind];
if((rfd = open(filename,O_RDONLY)) == -1)
{
fprintf(stderr,"%s:",argv[0]);
return -1;
}

while((rlen = read(rfd,rbuf,READSIZE)) > 0){
for(i=0; i < rlen; i++){
if(rbuf[i] == '\n'){
pb = &rbuf[i];
if(++nent == nline)
break;
}
}
if(nent == nline){
printf("%.*s",pb-&rbuf[0]+1,rbuf);
break;
}
else if(i == rlen)
printf("%s",rbuf);
memset(rbuf,0,READSIZE);
}
ret = close(rfd);
if(ret == -1)
fprintf(stderr,"%s:",argv[0]);
return ret;
}

实现tail

tail要复杂一点,我们取距文件尾的n行,取得第n行距离文件尾的位置即可。所以我们仍需要一换行计数器,还需要一个距文件尾的偏移量累加量。下面函数实现此功能,该函数结果返回的就是距文件尾的偏移量:

 1 int  locate(int fd,int nline,char* rbuf){
2 off_t offs_seek;
3 int len=0,
4 l=1,
5 i=0,
6 nent=0, //换行计数器
7 offs2end=0, //距文件结尾的偏移量
8 off=0,
9 whence=SEEK_END;
10
11 char *pb;//指向行换行指针
12 struct stat pstat; //文件状态
13 if(fstat(fd,&pstat) < 0){
14 perror("ttail");
15 return -1;
16 }
17 //处理缓冲区大小大于文件大小的
18 if(pstat.st_size <= READSIZE){
19 off = 0;
20 whence = SEEK_CUR;
21 }
22 else{
23 off = 0-READSIZE;
24 whence = SEEK_END;
25 }
26
27 while((offs_seek=lseek(fd,l*off,whence)) != -1)
28 {
29 l++;
30 if((len = read(fd,rbuf,READSIZE)) > 0)
31 {
32 for(i=len-1; i >=0; i--){
33 if(rbuf[i] == '\n'){
34 pb = &rbuf[i];
35 if(++nent == nline+1)
36 break;
37 }
38 }
39 //累计距文件尾的偏移量
40 if(nent != nline+1){
41 offs2end += len;
42 }
43 else {
44 offs2end += (&rbuf[len-1]-pb);
45 break;
46 }
47 memset(rbuf,0,READSIZE);
48 }
49 }
50 if(offs_seek == -1)
51 printerr(errno);
52 return offs2end;
53 }

定位过程:定义距文件尾的偏移量offs2end,定义换行计数器nent,用lseek直接从文件尾定位,读取一缓冲区的数据,根据要取的行数,做内容分析、nent(换行计数器)累加和offs2end(距文件尾的偏移量)累计,一直到nent累计到需要的值,则offs2end累加计算也结束了,返回它。

全部代码:

ttail

#include <stdio.h>
#include <string.h>
#include <unistd.h>
#include <getopt.h>
#include <fcntl.h>
#include <sys/stat.h>
#include <errno.h>

#define READSIZE 4096

int locate(int fd,int nline,char* rbuf); //计算要打印的行距文件尾的偏移量(fd:文件描述符,nline:打印行数,rbuf:内容缓冲)
int printd(int fd,off_t offs2end,char* buf);//打印内容(fd:文件描述符,offset:距文件尾的偏移量,buf:内容缓冲)
void printerr(int); //打印错误信息

int main(int argc ,char* argv[])
{
int rfd =-1, //文件描述付
ret=-1, //主程序返回值
nline =10; //读取文件行数(默认10)
char rbuf[READSIZE];//读取缓冲区
char c;

memset(rbuf,0,READSIZE);
//程序参数解析
while((c=getopt(argc,argv,"n:")) != -1){
switch(c){
case 'n'://指定显示行数
nline = atoi(optarg);
break;
default:
fprintf(stderr,"Usage: %s [-n lines]",argv[0]);
return -1;
}
}
//打开文件
if((rfd = open(argv[optind],O_RDONLY)) == -1){
fprintf(stderr,"%s:",argv[0]);
return -1;
}

// if(pstat.st_size < READSIZE)
printd(rfd,(off_t)(0-locate(rfd,nline,rbuf)),rbuf);
//关闭文件,并返回
ret = close(rfd);
if(ret == -1)
fprintf(stderr,"%s:\n",argv[0]);
return ret;
}
int locate(int fd,int nline,char* rbuf){
off_t offs_seek;
int len=0,
l=1,
i=0,
nent=0, //换行计数器
offs2end=0, //距文件结尾的偏移量
off=0,
whence=SEEK_END;

char *pb;//指向行换行指针
struct stat pstat; //文件状态
if(fstat(fd,&pstat) < 0){
perror("ttail");
return -1;
}
//处理缓冲区大小大于文件大小的
if(pstat.st_size <= READSIZE){
off = 0;
whence = SEEK_CUR;
}
else{
off = 0-READSIZE;
whence = SEEK_END;
}

while((offs_seek=lseek(fd,l*off,whence)) != -1)
{
l++;
if((len = read(fd,rbuf,READSIZE)) > 0)
{
for(i=len-1; i >=0; i--){
if(rbuf[i] == '\n'){
pb = &rbuf[i];
if(++nent == nline+1)
break;
}
}
//累计距文件尾的偏移量
if(nent != nline+1){
offs2end += len;
}
else {
offs2end += (&rbuf[len-1]-pb);
break;
}
memset(rbuf,0,READSIZE);
}
}
if(offs_seek == -1)
printerr(errno);
return offs2end;
}

int printd(int fd,off_t offs2end,char* buf){
int len =0;
if(lseek(fd,offs2end,SEEK_END) == -1)
return -1;
memset(buf,0,READSIZE);
while((len=read(fd,buf,READSIZE)) > 0){
printf("%s",buf);
memset(buf,0,READSIZE);
}
}

void printerr(int err){
if(err == EBADF)
printf("fd is not an open file descriptor.\n");
else if(err == EINVAL)
printf("whence is not one of SEEK_SET, SEEK_CUR, SEEK_END; or the\
resulting file offset would be negative, or beyond the end of a seekable device.\n");
else if(err == EOVERFLOW )
printf(" The resulting file offset cannot be represented in an off_t.\n");
else if(err == ESPIPE)
printf("fd is associated with a pipe, socket, or FIFO.\n");
}

【上篇】
【下篇】

抱歉!评论已关闭.