基于hash表的文件字符串替换

现在的位置: 首页 > 综合 > 正文

基于hash表的文件字符串替换

2013年10月13日 ⁄ 综合 ⁄ 共 2437字 ⁄ 字号小中大 ⁄ 评论关闭

最近一直在研究百度之星的题目。刚好碰到初赛第三题，查阅了网上各牛人的解题方法，感觉收获颇多，对其中的hash的方法自己也有所理解，因此顺着自己的思路，也完成了基于hash表的该题的解法。与大家一同分享

第三题（共四题 100 分）：字符串替换（ 30 分）

题目描述：请编写程序，根据指定的对应关系，把一个文本中的字符串替换成另外的字符串。

输入数据：程序读入已被命名为 text.txt 和 dict.txt 的两个输入数据文本文件， text.txt 为一个包含大量字符串（含中文）的文本，以 whitespace 为分隔符； dict.txt 为表示字符串（ s1 ）与字符串（ s2 ）的对应关系的另一个文本（含中文），大约在 1 万行左右，每行两个字符串（即 s1 和 s2 ），用一个 \t 或空格分隔。 dict.txt 中各行的 s1 没有排序，并有可能有重复，这时以最后出现的那次 s1 所对应的 s2 为准。 text.txt 和 dict.txt 中的每个字符串都可能包含除 whitespace 之外的任何字符。 text.txt 中的字符串必须和 dict.txt 中的某 s1 完全匹配才能被替换。（为便于调试，您可下载测试 text.txt 和 dict.txt 文件，实际运行时我们会使用不同内容的输入文件。）

输出数据：在标准输出上打印 text.txt 被 dict.txt 替换后了的整个文本。

评分标准：程序输出结果必须正确，内存使用越少越好，程序的执行时间越快越好。

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#define MAX_HASH_SIZE 20000
#define MAX_LINE_SIZE 128*1024
struct ThashPoint
{
	char *s1; //dict.txt 第一个字符串
	char *s2; //dict.txt 第二个字符串
	struct ThashPoint *next;	//根据相应的hash值确定hash链表
};

struct ThashPoint *Hash[MAX_HASH_SIZE];//建立hash表
char line[MAX_LINE_SIZE];
char lines[MAX_LINE_SIZE];
FILE *f;
char *s1; //用于读取dict.txt 中第一个字符串
char *s2; //用于读取dict.txt 中第二个字符串

int read_line(void)
{
	int i = 0;
	fgets(lines,MAX_LINE_SIZE,f)	;
	if(strlen(lines)>0&&lines[strlen(lines)-1]=='\n')
		lines[strlen(lines)-1]= '\0';
	if(strlen(lines)<3)
		return 0;
#if 1
	for(i = 0;lines[i]!= '\0';i++ )
	{
		if(lines[i] == ' '||lines[i]== '\t')
		{
			s1 = lines;
			s2 = lines+i+1;
			lines[i] = '\0';
			return 1;
		}	
		
	}
#endif
	return 0;
}
int hash_count(char *s)
{
	int address = sizeof(s)%MAX_HASH_SIZE;
	int i=0;
	for(i=0;s[i];i++)
	address = (address*100*s[i]+128)%MAX_HASH_SIZE;
	return address;
}
void insert_hash_table(void)
{
 int address = hash_count(s1);
 struct ThashPoint *p;
 for(p = Hash[address];p;p=p->next)
 {
 		if(strcmp(p->s1,s1)==0)
 		{
 			p->s2= (char *)	malloc(sizeof(char)*(strlen(s2)+1));
 			strcpy(p->s2,s2);
 		}	
 }	
 p = (ThashPoint *)malloc(sizeof(ThashPoint));
 p->s1 = (char *)malloc(sizeof(char)*(strlen(s1)+1));
 strcpy(p->s1,s1);
 p->s2 = (char *)malloc(sizeof(char)*(strlen(s2)+1));
 strcpy(p->s2,s2);
 p->next = Hash[address];
 Hash[address] = p;
}
void init_hash_fuc(void)
{
	f = fopen("dict.txt","r");
	while(read_line())
	{
		insert_hash_table();	
	}	
	fclose(f);
}
void Print(char *s)
{
	int address;
	struct ThashPoint *p;
	address = hash_count(s);
	for(p=Hash[address];p;p=p->next)
	{
		if(!strcmp(p->s1,s))
		{
			printf("%s",p->s2);
		return;
		}
	}
	printf("%s",s);
	
}
int main()
{
	init_hash_fuc();//初始化hash表 主要是读取dict.txt文件
	FILE *fp;
	fp = fopen("test.txt","r");
	
	char c;
	int i=0;
	
	while((c = fgetc(fp))!=EOF) //读取test.txt文件 然后取出字符串，进行对比
	{
			if(c == ' '||c == '\t'||c == '\n')
			{
				line[i] = '\0';
				Print(line);
				printf("%c",c);
				i = 0;
				continue;
			}
			line[i++] = c;
	}
	line[i] = '\0';
	Print(line);
	fclose(fp);
	return 0;
		
}

欢迎大家一起来交流哈~

【上篇】疯狂JAVA讲义—第八章：泛型
【下篇】poj 3026 Borg Maze (bfs+最小生成树)

作者: beater

该日志由 beater 于11年前发表在综合分类下，最后更新于 2013年10月13日.
转载请注明: 基于hash表的文件字符串替换 | 学步园 +复制链接

抱歉!评论已关闭.

学步园

基于hash表的文件字符串替换

作者: beater

书签

最新文章New

本站推荐

返回首页