Haskell的list类型应用很广泛,但不是做所有事情都适合用。List类型从头部取一个元素,或从头部插入一个元素的时间复杂度是O(1),如果随机访问任意位置的元素,则时间复杂度为O(n)。List类型如果求它的长度用length函数,则时间复杂度为O(n),所以如果判断一个List对象是否为空,最好用null函数,而不是判断length等于0。
Haskell在读写文件的时候,尤其大的文件的时候,最好不采用List数据类型。Haskell提供的Data.ByteString类型,可以用来比较有效率地读写大文件。
下面是一个应用Data.ByteString来分割合并文件的例子:
main = do
progName <- getProgName
args <- getArgs
case args of
-- two args, divide file
([_,_]) -> do
let divideFileName = args !! 0
let numberOfPart = read (args !! 1 ) :: Integer
divideFile divideFileName numberOfPart
-- three args, merge file
([_,_,_]) -> do
let option = args !! 0
if option == "-m"
then do
let partFileName = args !! 1
let outFileName = args !! 2
mergeFile partFileName outFileName
else
printUsage progName
-- otherwise print usage
(_) ->
printUsage progName
{-
partFileName is gemerated when dividing file;
outFileName is the fileName for merged file
-}
mergeFile partFileName outFileName =
do names<-partFileNames partFileName
mergeFile' names outFileName
{-
fileNames is get from partFile which is generated when dividing file
-}
mergeFile' fileNames outFileName =
case fileNames of
(fileName:fs) -> do content <- BS.readFile fileName
BS.appendFile outFileName content
mergeFile' fs outFileName
_ -> return ()
partFileNames :: FilePath ->IO [FilePath]
partFileNames partFileName = do
fileHandle <- openFile partFileName ReadMode
result <- parseFileName fileHandle
hClose fileHandle
return result
parseFileName::Handle->IO [FilePath]
parseFileName fileHandle =
handle ( eofEncuntered ) $
do line<-hGetLine fileHandle
let fileName = line2FileName line
fileNames<-parseFileName fileHandle
return (fileName:fileNames)
{-
the second word in the line is file names
-}
line2FileName::String->String
line2FileName s = words s !! 1
eofEncuntered::IOError ->IO [FilePath]
eofEncuntered e = return []
printUsage progName = do
mapM_ putStrLn lines
where
lines = [progName ++ " <divide-file-name> <number-of-parts> ",
progName ++ " -m <parts-file-name> <out-file-name> "]
filePartList :: Integer -> Integer -> [Integer]
filePartList fileTotalSize numberOfPart =
let eachSize = fileTotalSize `div` numberOfPart
remain = fileTotalSize `mod` numberOfPart
in if remain > 0 then
filePartList' fileTotalSize (eachSize+1)
else
filePartList' fileTotalSize eachSize
where filePartList' fileTotalSize eachSize
| fileTotalSize <= eachSize = [fileTotalSize]
| otherwise = eachSize : (filePartList' (fileTotalSize - eachSize) eachSize)
divideFile :: FilePath -> Integer -> IO ()
divideFile divideFileName numberOfPart = do
fileTotalSize <- fileSize divideFileName
content<-BS.readFile divideFileName
let fileSizeList = filePartList fileTotalSize numberOfPart
let fileMapList = zip [0..] fileSizeList
saveIndividualFile divideFileName fileMapList content
saveDivideInfo divideFileName fileMapList
saveDivideInfo :: FilePath -> [(Integer,Integer)] -> IO ()
saveDivideInfo fileName fileMapList = writeFile (fileName++".parts") (strLst2Str (divideInfo fileMapList))
where divideInfo fileMapList=case fileMapList of
((fileIndex,fileSize):fm) -> ((show fileIndex) ++" "++ fileName ++ (show fileIndex) ++ " "++(show fileSize) ): (divideInfo fm)
[] -> []
strLst2Str :: [String] -> String
strLst2Str (x:xs) = x++('/n':(strLst2Str xs))
strLst2Str [] = ""
saveIndividualFile :: FilePath -> [(Integer,Integer)] ->BS.ByteString->IO ()
saveIndividualFile fileName fileMapList content =
case fileMapList of
((fileIndex,fileSize):fm) -> do
let prefix = BS.take (read(show(fileSize))::Int64) content
let postfix = BS.drop (read(show(fileSize))::Int64) content
BS.writeFile (fileName ++ (show fileIndex)) prefix
saveIndividualFile fileName fm postfix
[] -> return ()
fileSize :: FilePath -> IO Integer
fileSize fileName = do
fileHandle <- openFile fileName ReadMode
size <- hFileSize fileHandle
hClose fileHandle
return size