01 Dos2Unix:
很久以前,老式的电传打字机使用两个字符来另起新行。一个字符把滑动架移回首位 (称为回车, <CR>),另一个字符把纸上移一行 (称为换行, <LF>)。当计算机问世以后,存储器曾经非常昂贵。有些人就认定没必要用两个字符来表示行尾。UNIX 开发者决定他们可以用 <Line Feed> 一个字符来表示行尾。Apple 开发者规定了用 <CR> 。开发 MS-DOS (以及微软视窗) 的那些家伙则决定沿用老式的<CR><LF>。那意味着,如果你试图把一个文件从一种系统移到另一种系统,那么你就有换行符方面的麻烦。
Remove cr ('\x0d') characters from a file. 这是程序注释中对自己的解释。如果你在linux下使用vi编辑一个采用 MS-DOS 格式的文件,你将会发现每一行的末尾有个 ^M 字符。(^M 就是 <CR>)。
\n 匹配一个换行符<LF>,等价于 \x0a 和 \cJ。
\r 匹配一个回车符<CR>,等价于 \x0d 和 \cM。
所以我们今天要看的这个程序就产生了。我们不去讲一些编码的细节,主要看程序所完成的文件、一些I/O操作。想一想其实我们的任务就是拿到一个dos的文件把它每一行的回车符给去掉就OK了。够简单吧(符合我现在的水平),思考了一下,我们程序基本要做的工作可以列出来:
第一步:得到要更改的文件
第二步:顺序读取文件,遇到回车符去掉
第三步:关了文件
这和把大象放到冰箱里的难易度真的很相近。
可惜一个比较成熟的程序要拿出去给别人使用还需要附加很多元素进去,我们今天既要关注程序的功能模块,也要学习人家高手的程序结构。
我们先看一下程序的主要功能模块是如何实现的:
while ((TempChar = getc(ipInF)) != EOF) { //这句话制造遍历整个文件的循环
if (TempChar != '\x0d') { //判断是不是回车符
if (putc(D2UAsciiTable[TempChar], ipOutF) == EOF) { //从一个ASCII表中得到相应字符写入
RetVal = -1; //错误返回
if (!ipFlag->Quiet) //标志字段:是否显示错误
fprintf(stderr, "dos2unix: can not write to out file\n");
break;
}
} else {
if (ipFlag->NewLine) { //标志字段
putc('\n', ipOutF);
}
}
}
下面我们介绍一下程序的结构及一些功能的实现方法:
程序由下面函数组成:
void PrintUsage(void) :显示使用方法
void PrintVersion(void) :显示软件版本
FILE* OpenInFile(char *ipFN):打开输入文件
FILE* OpenOutFile(char *ipFN):打开输出文件
int ConvertDosToUnix(FILE* ipInF, FILE* ipOutF, CFlag *ipFlag):转换文件格式
int ConvertDosToUnixNewFile(char *ipInFN, char *ipOutFN, CFlag *ipFlag):将文件转化到新的文件中
int ConvertDosToUnixOldFile(char* ipInFN, CFlag *ipFlag):在原有文件的基础上转换
int ConvertDosToUnixStdio(CFlag *ipFlag):在基本IO上直接转换
int main (int argc, char *argv[]) :主函数。
我从这个程序中得到的收获也不少:
1、程序定义的结构体CFlag ,使程序中的很多标志符统一起来便与管理,而不用零散的使用很多标志符。
2、使用预处理器的条件编译如#ifdef __MSDOS__ 提供了程序的灵活性。
3、程序使用了下面的结构来完成对程序参数的执行:
while ((++ArgIdx < argc) && (!ShouldExit))
{
/* is it an option? */
if (argv[ArgIdx][0] == '-')
{
/* an option */
if ((strcmp(argv[ArgIdx],"-h") == 0) || (strcmp(argv[ArgIdx],"--help") == 0))
今天就先说这么多,明天我们对linux下的基本IO做的总结吧。可以好好看看下面代码,对结构化程序设计很有帮助。
/*
* Added extra newline if ^M occurs
* Christian Wurll, wurll@ira.uka.de
* Thu Nov 19 1998
*
* Added Mac text file translation, i.e. \r to \n conversion
* Bernd Johannes Wuebben, wuebben@kde.org
* Wed Feb 4 19:12:58 EST 1998
*
* Name: dos2unix
* Documentation:
* Remove cr ('\x0d') characters from a file.
* Version: $$Id: dos2unix.c 2.2 1995/03/31 01:40:24 blin Exp blin $$
*
* Copyright (c) 1994, 1995 Benjamin Lin.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice in the documentation and/or other materials provided with
* the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
* OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
* OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
* IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* == 1.0 == 1989.10.04 == John Birchfield (jb@koko.csustan.edu)
* == 1.1 == 1994.12.20 == Benjamin Lin (blin@socs.uts.edu.au)
* Cleaned up for Borland C/C++ 4.02
* == 1.2 == 1995.03.16 == Benjamin Lin (blin@socs.uts.edu.au)
* Modified to more conform to UNIX style.
* == 2.0 == 1995.03.19 == Benjamin Lin (blin@socs.uts.edu.au)
* Rewritten from scratch.
* == 2.1 == 1995.03.29 == Benjamin Lin (blin@socs.uts.edu.au)
* Conversion to SunOS charset implemented.
* == 2.2 == 1995.03.30 == Benjamin Lin (blin@socs.uts.edu.au)
* Fixed a bug in 2.1 where in new file mode, if outfile already exists
* conversion can not be completed properly.
*
* == BUG ==
* stdio process under DOS not working
*/
#define RCS_AUTHOR "$$Author: wurll $$"
#define RCS_DATE "$$Date: Thu Nov 19 1998 $$"
#define RCS_REVISION "$$Revision: 3.1 $$"
#define VER_AUTHOR "Christian Wurll"
#define VER_DATE "Thu Nov 19 1998"
#define VER_REVISION "3.1"
#define MACMODE 1
static int macmode = 0;
/* #define DEBUG */
#ifdef __MSDOS__ //A:to know the detail of c pre-processor :conditional compilation
# include <dir.h> //
#endif __MSDOS__
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <utime.h>
#include <sys/stat.h>
#include "dos2unix.h"
#ifdef __MSDOS__
#define R_CNTRL "rb"
#define W_CNTRL "wb"
#else __MSDOS__
#define R_CNTRL "r"
#define W_CNTRL "w"
#endif __MSDOS__
typedef struct
{
int NewFile; /* is in new file mode? */
int Quiet; /* is in quiet mode? */
int KeepDate; /* should keep date stamp? */
int ConvMode; /* 0 - ASCII, 1 - 7 bit, 2 - ISO, 3- Mac*/
int NewLine; /* if TRUE, then additional newline */
} CFlag;
void PrintUsage(void)
{
fprintf(stderr, "dos2unix Copyright (c) 1994-1995 Benjamin Lin\n" " Copyright (c) 1998 Bernd Johannes Wuebben (Version 3.0)\n");
fprintf(stderr, " Copyright (c) 1998 Christian Wurll (Version 3.1)\n");
fprintf(stderr, "Usage: dos2unix [-hkqV] [-c convmode] [-o file ...] [-n infile outfile ...]\n");
fprintf(stderr, " -h --help give this help\n");
fprintf(stderr, " -k --keepdate keep output file date\n");
fprintf(stderr, " -q --quiet quiet mode, suppress all warnings\n");
fprintf(stderr, " always on in stdin->stdout mode\n");
fprintf(stderr, " -V --version display version number\n");
fprintf(stderr, " -c --convmode conversion mode\n");
fprintf(stderr, " convmode ASCII, 7bit, ISO, Mac, default to ASCII\n");
fprintf(stderr, " -l --newline add additional newline in all but Mac convmode\n");
fprintf(stderr, " -o --oldfile write to old file\n");
fprintf(stderr, " file ... files to convert in old file mode\n");
fprintf(stderr, " -n --newfile write to new file\n");
fprintf(stderr, " infile original file in new file mode\n");
fprintf(stderr, " outfile output file in new file mode\n");
}
void PrintVersion(void)
{
fprintf(stderr, "dos2unix %s (%s)\n", VER_REVISION, VER_DATE);
#ifdef DEBUG
fprintf(stderr, "RCS_AUTHOR: %s\n", RCS_AUTHOR);
fprintf(stderr, "RCS_DATE: %s\n", RCS_DATE);
fprintf(stderr, "RCS_REVISION: %s\n", RCS_REVISION);
fprintf(stderr, "VER_AUTHOR: %s\n", VER_AUTHOR);
fprintf(stderr, "VER_DATE: %s\n", VER_DATE);
fprintf(stderr, "VER_REVISION: %s\n", VER_REVISION);
#endif DEBUG
}
/* opens file of name ipFN in read only mode
* RetVal: NULL if failure
* file stream otherwise
*/
FILE* OpenInFile(char *ipFN)
{
return (fopen(ipFN, R_CNTRL));
}
/* opens file of name ipFN in write only mode
* RetVal: NULL if failure
* file stream otherwise
*/
FILE* OpenOutFile(char *ipFN)
{
return (fopen(ipFN, W_CNTRL));
}
/* converts stream ipInF to UNIX format text and write to stream ipOutF
* RetVal: 0 if success
* -1 otherwise
*/
int ConvertDosToUnix(FILE* ipInF, FILE* ipOutF, CFlag *ipFlag)
{
int RetVal = 0;
int TempChar;
if ( macmode )
ipFlag->ConvMode = 3;
switch (ipFlag->ConvMode)
{
case 0: /* ASCII */
while ((TempChar = getc(ipInF)) != EOF) {
if (TempChar != '\x0d') {
if (putc(D2UAsciiTable[TempChar], ipOutF) == EOF) {
RetVal = -1;
if (!ipFlag->Quiet)
fprintf(stderr, "dos2unix: can not write to out file\n");
break;
}
} else {
if (ipFlag->NewLine) {
putc('\n', ipOutF);
}
}
}
break;
case 1: /* 7Bit */
while ((TempChar = getc(ipInF)) != EOF) {
if (TempChar != '\x0d') {
if (putc(D2U7BitTable[TempChar], ipOutF) == EOF) {
RetVal = -1;
if (!ipFlag->Quiet)
fprintf(stderr, "dos2unix: can not write to out file\n");
break;
}
} else {
if (ipFlag->NewLine) {
putc('\n', ipOutF);
}
}
}
break;
case 2: /* ISO */
while ((TempChar = getc(ipInF)) != EOF) {
if (TempChar != '\x0d') {
if (putc(D2UIsoTable[TempChar], ipOutF) == EOF) {
RetVal = -1;
if (!ipFlag->Quiet)
fprintf(stderr, "dos2unix: can not write to out file\n");
break;
}
} else {
if (ipFlag->NewLine) {
putc('\n', ipOutF);
}
}
}
break;
case 3: /* Mac */
while ((TempChar = getc(ipInF)) != EOF)
if ((TempChar != '\x0d'))
{
if(putc(D2UAsciiTable[TempChar], ipOutF) == EOF){
RetVal = -1;
if (!ipFlag->Quiet)
fprintf(stderr, "dos2unix: can not write to out file\n");
break;
}
}
else{
if (putc('\x0a', ipOutF) == EOF)
{
RetVal = -1;
if (!ipFlag->Quiet)
fprintf(stderr, "dos2unix: can not write to out file\n");
break;
}
}
break;
default: /* unknown convmode */
;
#ifdef DEBUG
fprintf(stderr, "dos2unix: program error, invalid conversion mode %d\n",ipFlag->ConvMode);
exit(1);
#endif DEBUG
}
return RetVal;
}
/* convert file ipInFN to UNIX format text and write to file ipOutFN
* RetVal: 0 if success
* -1 otherwise
*/
int ConvertDosToUnixNewFile(char *ipInFN, char *ipOutFN, CFlag *ipFlag)
{
int RetVal = 0;
FILE *InF = NULL;
FILE *TempF = NULL;
char TempPath[16];
struct stat StatBuf;
struct utimbuf UTimeBuf;
/* retrieve ipInFN file date stamp */
if ((ipFlag->KeepDate) && stat(ipInFN, &StatBuf))
RetVal = -1;
strcpy (TempPath, "./d2utmp");
strcat (TempPath, "XXXXXX");
mktemp (TempPath);
#ifdef DEBUG
fprintf(stderr, "dos2unix: using %s as temp file\n", TempPath);
#endif DEBUG
/* can open in file? */
if ((!RetVal) && ((InF=OpenInFile(ipInFN)) == NULL))
RetVal = -1;
/* can open out file? */
if ((!RetVal) && (InF) && ((TempF=OpenOutFile(TempPath)) == NULL))
{
fclose (InF);
RetVal = -1;
}
/* conversion sucessful? */
if ((!RetVal) && (ConvertDosToUnix(InF, TempF, ipFlag)))
RetVal = -1;
/* can close in file? */
if ((InF) && (fclose(InF) == EOF))
RetVal = -1;
/* can close out file? */
if ((TempF) && (fclose(TempF) == EOF))
RetVal = -1;
if ((!RetVal) && (ipFlag->KeepDate))
{
UTimeBuf.actime = StatBuf.st_atime;
UTimeBuf.modtime = StatBuf.st_mtime;
/* can change out file time to in file time? */
if (utime(TempPath, &UTimeBuf) == -1)
RetVal = -1;
}
/* any error? */
if ((RetVal) && (unlink(TempPath)))
RetVal = -1;
/* can rename temp file to out file? */
if (!RetVal)
{
if (stat(ipOutFN, &StatBuf) == 0)
unlink(ipOutFN);
if ((rename(TempPath, ipOutFN) == -1) && (!ipFlag->Quiet))
{
fprintf(stderr, "dos2unix: problems renaming '%s' to '%s'\n", TempPath, ipOutFN);
fprintf(stderr, " output file remains in '%s'\n", TempPath);
RetVal = -1;
}
}
return RetVal;
}
/* convert file ipInFN to UNIX format text
* RetVal: 0 if success
* -1 otherwise
*/
int ConvertDosToUnixOldFile(char* ipInFN, CFlag *ipFlag)
{
int RetVal = 0;
FILE *InF = NULL;
FILE *TempF = NULL;
char TempPath[16];
struct stat StatBuf;
struct utimbuf UTimeBuf;
/* retrieve ipInFN file date stamp */
if ((ipFlag->KeepDate) && stat(ipInFN, &StatBuf))
RetVal = -1;
strcpy (TempPath, "./u2dtmp");
strcat (TempPath, "XXXXXX");
mktemp (TempPath);
#ifdef DEBUG
fprintf(stderr, "dos2unix: using %s as temp file\n", TempPath);
#endif DEBUG
/* can open in file? */
if ((!RetVal) && ((InF=OpenInFile(ipInFN)) == NULL))
RetVal = -1;
/* can open out file? */
if ((!RetVal) && (InF) && ((TempF=OpenOutFile(TempPath)) == NULL))
{
fclose (InF);
RetVal = -1;
}
/* conversion sucessful? */
if ((!RetVal) && (ConvertDosToUnix(InF, TempF, ipFlag)))
RetVal = -1;
/* can close in file? */
if ((InF) && (fclose(InF) == EOF))
RetVal = -1;
/* can close out file? */
if ((TempF) && (fclose(TempF) == EOF))
RetVal = -1;
if ((!RetVal) && (ipFlag->KeepDate))
{
UTimeBuf.actime = StatBuf.st_atime;
UTimeBuf.modtime = StatBuf.st_mtime;
/* can change out file time to in file time? */
if (utime(TempPath, &UTimeBuf) == -1)
RetVal = -1;
}
/* can delete in file? */
if ((!RetVal) && (unlink(ipInFN) == -1))
RetVal = -1;
/* any error? */
if ((RetVal) && (unlink(TempPath)))
RetVal = -1;
/* can rename out file to in file? */
if ((!RetVal) && (rename(TempPath, ipInFN) == -1))
{
if (!ipFlag->Quiet)
{
fprintf(stderr, "dos2unix: problems renaming '%s' to '%s'\n", TempPath, ipInFN);
fprintf(stderr, " output file remains in '%s'\n", TempPath);
}
RetVal = -1;
}
return RetVal;
}
/* convert stdin to UNIX format text and write to stdout
* RetVal: 0 if success
* -1 otherwise
*/
int ConvertDosToUnixStdio(CFlag *ipFlag)
{
ipFlag->NewFile = 1;
ipFlag->Quiet = 1;
ipFlag->KeepDate = 0;
return (ConvertDosToUnix(stdin, stdout, ipFlag));
}
int main (int argc, char *argv[])
{
/* variable declarations */
int ArgIdx;
int CanSwitchFileMode;
int ShouldExit;
CFlag *pFlag;
/* variable initialisations */
ArgIdx = 0;
CanSwitchFileMode = 1;
ShouldExit = 0;
pFlag = (CFlag*)malloc(sizeof(CFlag));
pFlag->NewFile = 0;
pFlag->Quiet = 0;
pFlag->KeepDate = 0;
pFlag->ConvMode = 0;
pFlag->NewLine = 0;
if( strcmp(argv[0],"mac2unix") == 0 )
macmode = MACMODE;
/* no option, use stdin and stdout */
if (argc == 1)
{
exit(ConvertDosToUnixStdio(pFlag));
}
while ((++ArgIdx < argc) && (!ShouldExit))
{
/* is it an option? */
if (argv[ArgIdx][0] == '-')
{
/* an option */
if ((strcmp(argv[ArgIdx],"-h") == 0) || (strcmp(argv[ArgIdx],"--help") == 0))
PrintUsage();
if ((strcmp(argv[ArgIdx],"-k") == 0) || (strcmp(argv[ArgIdx],"--keepdate") == 0))
pFlag->KeepDate = 1;
if ((strcmp(argv[ArgIdx],"-q") == 0) || (strcmp(argv[ArgIdx],"--quiet") == 0))
pFlag->Quiet = 1;
if ((strcmp(argv[ArgIdx],"-l") == 0) || (strcmp(argv[ArgIdx],"--newline") == 0))
pFlag->NewLine = 1;
if ((strcmp(argv[ArgIdx],"-V") == 0) || (strcmp(argv[ArgIdx],"--version") == 0))
PrintVersion();
if ((strcmp(argv[ArgIdx],"-c") == 0) || (strcmp(argv[ArgIdx],"--convmode") == 0))
{
ArgIdx++;
if (strcmpi(argv[ArgIdx],"ASCII") == 0)
pFlag->ConvMode = 0;
else if (strcmpi(argv[ArgIdx], "7Bit") == 0)
pFlag->ConvMode = 1;
else if (strcmpi(argv[ArgIdx], "ISO") == 0)
pFlag->ConvMode = 2;
else if (strcmpi(argv[ArgIdx], "Mac") == 0)
pFlag->ConvMode = 3;
else
{
if (!pFlag->Quiet)
fprintf(stderr, "dos2unix: invalid %s conversion mode specified\n",argv[ArgIdx]);
ShouldExit = 1;
}
}
if ((strcmp(argv[ArgIdx],"-o") == 0) || (strcmp(argv[ArgIdx],"--oldfile") == 0))
{
/* last convert not paired */
if (!CanSwitchFileMode)
{
if (!pFlag->Quiet)
fprintf(stderr, "dos2unix: target of file %s not specified in new file mode\n", argv[ArgIdx-1]);
ShouldExit = 1;
}
pFlag->NewFile = 0;
}
if ((strcmp(argv[ArgIdx],"-n") == 0) || (strcmp(argv[ArgIdx],"--newfile") == 0))
{
/* last convert not paired */
if (!CanSwitchFileMode)
{
if (!pFlag->Quiet)
fprintf(stderr, "dos2unix: target of file %s not specified in new file mode\n", argv[ArgIdx-1]);
ShouldExit = 1;
}
pFlag->NewFile = 1;
}
}
else
{
/* not an option */
if (pFlag->NewFile)
{
if (CanSwitchFileMode)
CanSwitchFileMode = 0;
else
{
if (!pFlag->Quiet)
fprintf(stderr, "dos2unix: converting file %s to file %s in UNIX format ...\n", argv[ArgIdx-1], argv[ArgIdx]);
if (ConvertDosToUnixNewFile(argv[ArgIdx-1], argv[ArgIdx], pFlag))
{
if (!pFlag->Quiet)
fprintf(stderr, "dos2unix: problems converting file %s to file %s\n", argv[ArgIdx-1], argv[ArgIdx]);
ShouldExit = 1;
}
CanSwitchFileMode = 1;
}
}
else
{
if (!pFlag->Quiet)
fprintf(stderr, "dos2unix: converting file %s to UNIX format ...\n", argv[ArgIdx]);
if (ConvertDosToUnixOldFile(argv[ArgIdx], pFlag))
{
if (!pFlag->Quiet)
fprintf(stderr, "dos2unix: problems converting file %s\n", argv[ArgIdx]);
ShouldExit = 1;
}
}
}
}
if ((!pFlag->Quiet) && (!CanSwitchFileMode))
{
fprintf(stderr, "dos2unix: target of file %s not specified in new file mode\n", argv[ArgIdx-1]);
ShouldExit = 1;
}
free(pFlag);
return (ShouldExit);
}