Siempre he querido una solución para este problema, así que escribí una, que es parte de JvCsvDataSet. Mis problemas fueron:
- Quiero leer un archivo que pueda tener CR, CR + LF, o solo terminaciones LF.
- Quiero algo como ReadLn, pero que es muy flexible con respecto al punto n. ° 1 y que no tiene los problemas conocidos de ReadLn. Entonces Ancient Pascal tenía el tipo de archivo de texto y el procedimiento ReadLn. Se necesita un equivalente de clase moderna.
- Me gustaría que fuera un objeto similar a una secuencia para poder leer línea por línea y no cargar todo mi archivo de megabytes de 3.7 gigabytes en la memoria. Además, quiero que la posición sea de tipo Int64, y quiero poder manejar archivos muy grandes (> 2 gb).
- Quiero que esto funcione en Delphi 7, y también en Delphi XE2, y todo lo demás.
- Quería que fuera muy, muy, muy rápido. Así que dediqué un tiempo a optimizar el rendimiento de lectura en bloque y el análisis sintáctico.
Así que aquí es lo que iba a escribir si quieres hacer esto:
procedure TForm1.Button1Click(Sender: TObject);
var
ts:TTextStream;
s:String;
begin
ts := TTextStream.Create('c:\temp\test.txt', fm_OpenReadShared);
try
while not ts.Eof do begin
s := ts.ReadLine;
doSomethingWith(s);
end;
finally
ts.Free;
end;
end;
bien.Eso parece fácil ¿verdad? Es. Y hasta tiene un indicador de modo de archivo (¿notará la opción de lectura compartida?). Ahora todo lo que necesita es Teh Codez para TTextStream, que está aquí:
unit textStreamUnit;
{$M+}
{$R-}
{
textStreamUnit
This code is based on some of the content of the JvCsvDataSet written by Warren Postma, and others,
licensed under MOZILLA Public License.
}
interface
uses
Windows,
Classes,
SysUtils;
const
cQuote = #34;
cLf = #10;
cCR = #13;
{ File stream mode flags used in TTextStream }
{ Significant 16 bits are reserved for standard file stream mode bits. }
{ Standard system values like fmOpenReadWrite are in SysUtils. }
fm_APPEND_FLAG = $20000;
fm_REWRITE_FLAG = $10000;
{ combined Friendly mode flag values }
fm_Append = fmOpenReadWrite or fm_APPEND_FLAG;
fm_OpenReadShared = fmOpenRead or fmShareDenyWrite;
fm_OpenRewrite = fmOpenReadWrite or fm_REWRITE_FLAG;
fm_Truncate = fmCreate or fm_REWRITE_FLAG;
fm_Rewrite = fmCreate or fm_REWRITE_FLAG;
TextStreamReadChunkSize = 8192; // 8k chunk reads.
resourcestring
RsECannotReadFile = 'Cannot read file %';
type
ETextStreamException = class(Exception);
{$ifndef UNICODE}
RawByteString=AnsiString;
{$endif}
TTextStream = class(TObject)
private
FStream: TFileStream; // Tried TJclFileStream also but it was too slow! Do NOT use JCL streams here. -wpostma.
FFilename: string;
FStreamBuffer: PAnsiChar;
FStreamIndex: Integer;
FStreamSize: Integer;
FLastReadFlag: Boolean;
procedure _StreamReadBufInit;
public
function ReadLine: RawByteString; { read a string, one per line, wow. Text files. Cool eh?}
procedure Append;
procedure Rewrite;
procedure Write(const s: RawByteString); {write a string. wow, eh? }
procedure WriteLine(const s: RawByteString); {write string followed by Cr+Lf }
procedure WriteChar(c: AnsiChar);
procedure WriteCrLf;
//procedure Write(const s: string);
function Eof: Boolean; {is at end of file? }
{ MODE is typically a fm_xxx constant thatimplies a default set of stream mode bits plus some extended bit flags that are specific to this stream type.}
constructor Create(const FileName: string; Mode: DWORD = fm_OpenReadShared; Rights: Cardinal = 0); reintroduce; virtual;
destructor Destroy; override;
function Size: Int64; //override; // sanity
{ read-only properties at runtime}
property Filename: string read FFilename;
property Stream: TFileStream read FStream; { Get at the underlying stream object}
end;
implementation
// 2 gigabyte file limit workaround:
function GetFileSizeEx(h: HFILE; FileSize: PULargeInteger): BOOL; stdcall; external Kernel32;
procedure TTextStream.Append;
begin
Stream.Seek(0, soFromEnd);
end;
constructor TTextStream.Create(const FileName: string; Mode: DWORD; Rights: Cardinal);
var
IsAppend: Boolean;
IsRewrite: Boolean;
begin
inherited Create;
FFilename := FileName;
FLastReadFlag := False;
IsAppend := (Mode and fm_APPEND_FLAG) <> 0;
IsRewrite := (Mode and fm_REWRITE_FLAG) <> 0;
FStream := TFileStream.Create(Filename, {16 lower bits only}Word(Mode), Rights);
//Stream := FStream; { this makes everything in the base class actually work if we inherited from Easy Stream}
if IsAppend then
Self.Append // seek to the end.
else
Stream.Position := 0;
if IsRewrite then
Rewrite;
_StreamReadBufInit;
end;
destructor TTextStream.Destroy;
begin
if Assigned(FStream) then
FStream.Position := 0; // avoid nukage
FreeAndNil(FStream);
FreeMem(FStreamBuffer); // Buffered reads for speed.
inherited Destroy;
end;
function TTextStream.Eof: Boolean;
begin
if not Assigned(FStream) then
Result := False
//Result := True
else
Result := FLastReadFlag and (FStreamIndex >= FStreamSize);
//Result := FStream.Position >= FStream.Size;
end;
{ TTextStream.ReadLine:
This reads a line of text, normally terminated by carriage return and/or linefeed
but it is a bit special, and adapted for CSV usage because CR/LF characters
inside quotes are read as a single line.
This is a VERY PERFORMANCE CRITICAL function. We loop tightly inside here.
So there should be as few procedure-calls inside the repeat loop as possible.
}
function TTextStream.ReadLine: RawByteString;
var
Buf: array of AnsiChar;
n: Integer;
QuoteFlag: Boolean;
LStreamBuffer: PAnsiChar;
LStreamSize: Integer;
LStreamIndex: Integer;
procedure FillStreamBuffer;
begin
FStreamSize := Stream.Read(LStreamBuffer[0], TextStreamReadChunkSize);
LStreamSize := FStreamSize;
if LStreamSize = 0 then
begin
if FStream.Position >= FStream.Size then
FLastReadFlag := True
else
raise ETextStreamException.CreateResFmt(@RsECannotReadFile, [FFilename]);
end
else
if LStreamSize < TextStreamReadChunkSize then
FLastReadFlag := True;
FStreamIndex := 0;
LStreamIndex := 0;
end;
begin
{ Ignore linefeeds, read until carriage return, strip carriage return, and return it }
SetLength(Buf, 150);
n := 0;
QuoteFlag := False;
LStreamBuffer := FStreamBuffer;
LStreamSize := FStreamSize;
LStreamIndex := FStreamIndex;
while True do
begin
if n >= Length(Buf) then
SetLength(Buf, n + 100);
if LStreamIndex >= LStreamSize then
FillStreamBuffer;
if LStreamIndex >= LStreamSize then
Break;
Buf[n] := LStreamBuffer[LStreamIndex];
Inc(LStreamIndex);
case Buf[n] of
cQuote: {34} // quote
QuoteFlag := not QuoteFlag;
cLf: {10} // linefeed
if not QuoteFlag then
Break;
cCR: {13} // carriage return
begin
if not QuoteFlag then
begin
{ If it is a CRLF we must skip the LF. Otherwise the next call to ReadLine
would return an empty line. }
if LStreamIndex >= LStreamSize then
FillStreamBuffer;
if LStreamBuffer[LStreamIndex] = cLf then
Inc(LStreamIndex);
Break;
end;
end
end;
Inc(n);
end;
FStreamIndex := LStreamIndex;
SetString(Result, PAnsiChar(@Buf[0]), n);
end;
procedure TTextStream.Rewrite;
begin
if Assigned(FStream) then
FStream.Size := 0;// truncate!
end;
function TTextStream.Size: Int64; { Get file size }
begin
if Assigned(FStream) then
GetFileSizeEx(FStream.Handle, PULargeInteger(@Result)) {int64 Result}
else
Result := 0;
end;
{ Look at this. A stream that can handle a string parameter. What will they think of next? }
procedure TTextStream.Write(const s: RawByteString);
begin
Stream.Write(s[1], Length(s)); {The author of TStreams would like you not to be able to just write Stream.Write(s). Weird. }
end;
procedure TTextStream.WriteChar(c: AnsiChar);
begin
Stream.Write(c, SizeOf(AnsiChar));
end;
procedure TTextStream.WriteCrLf;
begin
WriteChar(#13);
WriteChar(#10);
end;
procedure TTextStream.WriteLine(const s: RawByteString);
begin
Write(s);
WriteCrLf;
end;
procedure TTextStream._StreamReadBufInit;
begin
if not Assigned(FStreamBuffer) then
begin
//FStreamBuffer := AllocMem(TextStreamReadChunkSize);
GetMem(FStreamBuffer, TextStreamReadChunkSize);
end;
end;
end.
+1 muy bien de hecho –
Recibí el error de compilación "Tipos incompatibles" en SetString (Resultado, M.Memory, M.Size); – ewlung
Cambié a SetString (Resultado, PChar (M.Memory), Integer (M.Size)); y se compiló Pero, si revisé el textList.count, el resultado es 1. Entonces, la conversión no es correcta. Los archivos de texto contienen múltiples líneas, cada una terminada con CR. Tu función convierte las líneas enteras en una cadena, lo cual es incorrecto. – ewlung