Как читать данные из NSFileHandle построчно?

У меня есть текстовый файл с данными как дано

например

PUFGUjVRallYZDNaazFtVjVObU1zWm5ZcUJUYU5ORk4zbGthNHNDVUdSMlFVQmpSVEoxUUNSallYaFhkanBITXBGR1NTQnpZRTltZE1OalVzSkdXQ0Z6WXR0V2RpTmpTdXgwTWs5V1lZSkZiWjFXT29OV2JSVlhaSTUwYUpwR040UUZXTzVHVXFoWFVRcFdWNHdVTUJ0Q1VHSmxXVlJVTlJCMVE1VTFWV
PUFGUjVRallYZDNaazFtVjVObU1zWm5ZcUJUYU5ORk4zbGthNHNDVUdSMlFVQmpSVEoxUUNSallYaFhkanBITXBGR1NTQnpZRTltZE1OalVzSkdXQ0Z6WXR0V2RpTmpTdXgwTWs5V1lZSkZiWjFXT29OV2JSVlhaSTUwYUpwR040UUZXTzVHVXFoWFVRcFdWNHdVTUJ0Q1VHSmxXVlJVTlJCMVE1VTFWV

Теперь я хочу читать данные построчно. Это означает, что сначала я хочу прочитать

PUFGUjVRallYZDNaazFtVjVObU1zWm5ZcUJUYU5ORk4zbGthNHNDVUdSMlFVQmpSVEoxUUNSallYaFhkanBITXBGR1NTQnpZRTltZE1OalVzSkdXQ0Z6WXR0V2RpTmpTdXgwTWs5V1lZSkZiWjFXT29OV2JSVlhaSTUwYUpwR040UUZXTzVHVXFoWFVRcFdWNHdVTUJ0Q1VHSmxXVlJVTlJCMVE1VTFWV

а затем следующий оставшийся. кто-нибудь есть идеи??

10 ответов

Решение

Если у вас небольшой файл, то метод @mipadi, вероятно, будет очень хорош. Однако, если ваш файл большой (> 1 МБ, возможно?), То вы можете рассмотреть чтение файла построчно. Я однажды написал для этого класс, который я вставлю сюда:

//DDFileReader.h

@interface DDFileReader : NSObject {
    NSString * filePath;

    NSFileHandle * fileHandle;
    unsigned long long currentOffset;
    unsigned long long totalFileLength;

    NSString * lineDelimiter;
    NSUInteger chunkSize;
}

@property (nonatomic, copy) NSString * lineDelimiter;
@property (nonatomic) NSUInteger chunkSize;

- (id) initWithFilePath:(NSString *)aPath;

- (NSString *) readLine;
- (NSString *) readTrimmedLine;

#if NS_BLOCKS_AVAILABLE
- (void) enumerateLinesUsingBlock:(void(^)(NSString*, BOOL *))block;
#endif

@end


//DDFileReader.m

#import "DDFileReader.h"

@interface NSData (DDAdditions)

- (NSRange) rangeOfData_dd:(NSData *)dataToFind;

@end

@implementation NSData (DDAdditions)

- (NSRange) rangeOfData_dd:(NSData *)dataToFind {

    const void * bytes = [self bytes];
    NSUInteger length = [self length];

    const void * searchBytes = [dataToFind bytes];
    NSUInteger searchLength = [dataToFind length];
    NSUInteger searchIndex = 0;

    NSRange foundRange = {NSNotFound, searchLength};
    for (NSUInteger index = 0; index < length; index++) {
        if (((char *)bytes)[index] == ((char *)searchBytes)[searchIndex]) {
            //the current character matches
            if (foundRange.location == NSNotFound) {
                foundRange.location = index;
            }
            searchIndex++;
            if (searchIndex >= searchLength) { return foundRange; }
        } else {
            searchIndex = 0;
            foundRange.location = NSNotFound;
        }
    }
    return foundRange;
}

@end

@implementation DDFileReader
@synthesize lineDelimiter, chunkSize;

- (id) initWithFilePath:(NSString *)aPath {
    if (self = [super init]) {
        fileHandle = [NSFileHandle fileHandleForReadingAtPath:aPath];
        if (fileHandle == nil) {
            [self release]; return nil;
        }

        lineDelimiter = [[NSString alloc] initWithString:@"\n"];
        [fileHandle retain];
        filePath = [aPath retain];
        currentOffset = 0ULL;
        chunkSize = 10;
        [fileHandle seekToEndOfFile];
        totalFileLength = [fileHandle offsetInFile];
        //we don't need to seek back, since readLine will do that.
    }
    return self;
}

- (void) dealloc {
    [fileHandle closeFile];
    [fileHandle release], fileHandle = nil;
    [filePath release], filePath = nil;
    [lineDelimiter release], lineDelimiter = nil;
    currentOffset = 0ULL;
    [super dealloc];
}

- (NSString *) readLine {
    if (currentOffset >= totalFileLength) { return nil; }

    NSData * newLineData = [lineDelimiter dataUsingEncoding:NSUTF8StringEncoding];
    [fileHandle seekToFileOffset:currentOffset];
    NSMutableData * currentData = [[NSMutableData alloc] init];
    BOOL shouldReadMore = YES;

    NSAutoreleasePool * readPool = [[NSAutoreleasePool alloc] init];
    while (shouldReadMore) {
        if (currentOffset >= totalFileLength) { break; }
        NSData * chunk = [fileHandle readDataOfLength:chunkSize];
        NSRange newLineRange = [chunk rangeOfData_dd:newLineData];
        if (newLineRange.location != NSNotFound) {

            //include the length so we can include the delimiter in the string
            chunk = [chunk subdataWithRange:NSMakeRange(0, newLineRange.location+[newLineData length])];
            shouldReadMore = NO;
        }
        [currentData appendData:chunk];
        currentOffset += [chunk length];
    }
    [readPool release];

    NSString * line = [[NSString alloc] initWithData:currentData encoding:NSUTF8StringEncoding];
    [currentData release];
    return [line autorelease];
}

- (NSString *) readTrimmedLine {
    return [[self readLine] stringByTrimmingCharactersInSet:[NSCharacterSet whitespaceAndNewlineCharacterSet]];
}

#if NS_BLOCKS_AVAILABLE
- (void) enumerateLinesUsingBlock:(void(^)(NSString*, BOOL*))block {
  NSString * line = nil;
  BOOL stop = NO;
  while (stop == NO && (line = [self readLine])) {
    block(line, &stop);
  }
}
#endif

@end

Затем, чтобы использовать это, вы должны сделать:

DDFileReader * reader = [[DDFileReader alloc] initWithFilePath:pathToMyFile];
NSString * line = nil;
while ((line = [reader readLine])) {
  NSLog(@"read line: %@", line);
}
[reader release];

Или (для 10.6+ и iOS 4+):

DDFileReader * reader = [[DDFileReader alloc] initWithFilePath:pathToMyFile];
[reader enumerateLinesUsingBlock:^(NSString * line, BOOL * stop) {
  NSLog(@"read line: %@", line);
}];
[reader release];

Счастливого Рождества.:)

Я переписал это, чтобы быть ARC-совместимым:

//
//  DDFileReader.m
//  PBX2OPML
//
//  Created by michael isbell on 11/6/11.
//  Copyright (c) 2011 BlueSwitch. All rights reserved.
//

//DDFileReader.m

#import "DDFileReader.h"

@interface NSData (DDAdditions)

- (NSRange) rangeOfData_dd:(NSData *)dataToFind;

@end

@implementation NSData (DDAdditions)

- (NSRange) rangeOfData_dd:(NSData *)dataToFind {

    const void * bytes = [self bytes];
    NSUInteger length = [self length];

    const void * searchBytes = [dataToFind bytes];
    NSUInteger searchLength = [dataToFind length];
    NSUInteger searchIndex = 0;

    NSRange foundRange = {NSNotFound, searchLength};
    for (NSUInteger index = 0; index < length; index++) {
        if (((char *)bytes)[index] == ((char *)searchBytes)[searchIndex]) {
            //the current character matches
            if (foundRange.location == NSNotFound) {
                foundRange.location = index;
            }
            searchIndex++;
            if (searchIndex >= searchLength) { return foundRange; }
        } else {
            searchIndex = 0;
            foundRange.location = NSNotFound;
        }
    }
    return foundRange;
}

@end

@implementation DDFileReader
@synthesize lineDelimiter, chunkSize;

- (id) initWithFilePath:(NSString *)aPath {
    if (self = [super init]) {
        fileHandle = [NSFileHandle fileHandleForReadingAtPath:aPath];
        if (fileHandle == nil) {
            return nil;
        }

        lineDelimiter = @"\n";
        currentOffset = 0ULL; // ???
        chunkSize = 10;
        [fileHandle seekToEndOfFile];
        totalFileLength = [fileHandle offsetInFile];
        //we don't need to seek back, since readLine will do that.
    }
    return self;
}

- (void) dealloc {
    [fileHandle closeFile];
    currentOffset = 0ULL;

}

- (NSString *) readLine {
    if (currentOffset >= totalFileLength) { return nil; }

    NSData * newLineData = [lineDelimiter dataUsingEncoding:NSUTF8StringEncoding];
    [fileHandle seekToFileOffset:currentOffset];
    NSMutableData * currentData = [[NSMutableData alloc] init];
    BOOL shouldReadMore = YES;

    @autoreleasepool {

    while (shouldReadMore) {
        if (currentOffset >= totalFileLength) { break; }
        NSData * chunk = [fileHandle readDataOfLength:chunkSize];
        NSRange newLineRange = [chunk rangeOfData_dd:newLineData];
        if (newLineRange.location != NSNotFound) {

            //include the length so we can include the delimiter in the string
            chunk = [chunk subdataWithRange:NSMakeRange(0, newLineRange.location+[newLineData length])];
            shouldReadMore = NO;
        }
        [currentData appendData:chunk];
        currentOffset += [chunk length];
    }
    }

    NSString * line = [[NSString alloc] initWithData:currentData encoding:NSUTF8StringEncoding];
    return line;  
}

- (NSString *) readTrimmedLine {
    return [[self readLine] stringByTrimmingCharactersInSet:[NSCharacterSet whitespaceAndNewlineCharacterSet]];
}

#if NS_BLOCKS_AVAILABLE
- (void) enumerateLinesUsingBlock:(void(^)(NSString*, BOOL*))block {
    NSString * line = nil;
    BOOL stop = NO;
    while (stop == NO && (line = [self readLine])) {
        block(line, &stop);
    }
}
#endif

@end

Я начал проект GitHub на основе исходного кода Dave DeLong. Вы можете улучшить код. К настоящему времени я могу читать файлы вперед и назад.

NSString *fh = [NSString stringWithContentsOfFile:filePath encoding:fileEncoding error:NULL];
for (NSString *line in [fh componentsSeparatedByString:@"\n"]) {
    // Do something with the line
}

В Какао нет API или встроенных языковых конструкций для построчного чтения файла.

Ответ на этот вопрос для больших текстовых файлов не требует специальной функции. Objective-C является надмножеством c, и поэтому для этого есть методы c.

FILE* file = fopen("path to my file", "r");

size_t length;
char *cLine = fgetln(file,&length);

while (length>0) {
    char str[length+1];
    strncpy(str, cLine, length);
    str[length] = '\0';

    NSString *line = [NSString stringWithFormat:@"%s",str];        
    % Do what you want here.

    cLine = fgetln(file,&length);
}

Обратите внимание, что fgetln не сохранит ваш символ новой строки. Кроме того, мы +1 длина строки, потому что мы хотим освободить место для завершения NULL.

Вот метод, который я использовал для чтения отдельной строки из NSInputStream. Обратите внимание, что он оптимизирован для удобства чтения, а не для скорости.;-)

- (NSString*) readLine: (NSInputStream*) inputStream {
    NSMutableData* data = [NSMutableData data];
    uint8_t oneByte;
    do {
        int actuallyRead = [inputStream read: &oneByte maxLength: 1];
        if (actuallyRead == 1) {
            [data appendBytes: &oneByte length: 1];
        }        
    } while (oneByte != '\n');

    return [[NSString alloc] initWithData: data encoding: NSUTF8StringEncoding];

Я узнал, что GitX также использует линейный ридер.
Проверьте репозиторий братбарда на GitHub или на сайте Майкла Стапельберга.

@ Джо Ян
Ницца! Я посмотрю поближе в следующие дни.
Я был бы рад, если вы захотите раскошелиться на мой репозиторий на GitHub и отправить мне запрос на удаление.

Я сталкиваюсь с подобной ситуацией с некоторыми другими обстоятельствами, и вот мое решение в Swift 3. Предполагается, что текстовый файл будет utf8.

extension FileHandle {

    func enumerateLines(_ block: @escaping (String, UnsafeMutablePointer<Bool>) -> Void) {

        // find the end of file
        var offset = self.offsetInFile
        let eof = self.seekToEndOfFile()
        self.seek(toFileOffset: offset)
        let blockSize = 1024
        var buffer = Data()

        // process to the end of file
        while offset + UInt64(buffer.count) < eof {
            var found = false

            // make sure buffer contains at least one CR, LF or null
            while !found && offset + UInt64(buffer.count) < eof {
                let block = self.readData(ofLength: blockSize)
                buffer.append(block)
                for byte in block {
                    if [0x0d, 0x0a, 0x00].contains(byte) {
                        found = true ; break
                    }
                }
            }

            // retrieve lines within the buffer
            var index = 0
            var head = 0 // head of line
            var done = false
            buffer.enumerateBytes({ (pointer, count, stop) in
                while index < count {
                    // find a line terminator
                    if [0x0d, 0x0a, 0x00].contains(pointer[index]) {
                        let lineData = Data(pointer[head ..< index])
                        if let line = String(bytes: lineData, encoding: .utf8) {
                            block(line, &stop) // stop requested
                            if pointer[index] == 0x0d && index+1 < count && pointer[index+1] == 0x0a {
                                index += 2 ; head = index
                            }
                            else { index += 1 ; head = index }
                            if stop { done = true ; return } // end of enumerateLines
                        }
                        else { return } // end of enumerateLines
                    }
                    else { index += 1 }
                }
            })

            offset += UInt64(head)
            buffer.replaceSubrange(0 ..< head, with: Data())
            if done { // stop requested
                self.seek(toFileOffset: offset)
                return
            }
        }
    }

Вот использование:

    let fileURL = Bundle.main.url(forResource: "huge_file", withExtension: "txt")!
    let fileHandle = try! FileHandle(forReadingFrom: fileURL)

    fileHandle.enumerateLines { (line, stop) in
        if someCondition { stop.pointee = true }
        print(line)
    }
    /* let remaining = fileHandle.readDataToEndOfFile() */

https://gist.github.com/codelynx/c1de603a85e7503fe9597d027e93f4de

Вы также можете проверить библиотеку CGIStream, которую я создал для своего проекта сервера HTTP, по адресу https://github.com/xcvista/ohttpd2/tree/master/CGIStream. Вместо файловых дескрипторов этот код работает на NSInputStream. По сути, это клон Objective-C System.IO.StreamReader а также System.IO.StreamWriter из Microsoft.NET Framework.

Он будет работать не только с файлами, но и с сетевыми сокетами. Я использую его для обработки протокола HTTP, который является тезкой префикса CGI.

Я изменил FileReader в категорию NSFileHandle, надеюсь, он может помочь другим

@interface NSFileHandle (Readline)
- (NSString*)readLine;
- (NSString*)readLineBackwards;
@end

#import "NSFileHandle+Readline.h"
#import "NSDataExtensions.h"

@implementation NSFileHandle (Readline)

- (NSString*)readLine {

    NSString * _lineDelimiter = @"\n";

    NSData* newLineData = [_lineDelimiter dataUsingEncoding:NSUTF8StringEncoding];
    NSMutableData* currentData = [[NSMutableData alloc] init];
    BOOL shouldReadMore = YES;

    NSUInteger _chunkSize = 10;

    while (shouldReadMore) {
        NSData* chunk = [self readDataOfLength:_chunkSize]; // always length = 10

        if ([chunk length] == 0) {
            break;
        }

        // Find the location and length of the next line delimiter.
        NSRange newLineRange = [chunk rangeOfData:newLineData];
        if (newLineRange.location != NSNotFound) {
            // Include the length so we can include the delimiter in the string.
            NSRange subDataRange = NSMakeRange(0, newLineRange.location + [newLineData length]);
            unsigned long long newOffset = [self offsetInFile] - [chunk length] + newLineRange.location + [newLineData length];
            [self seekToFileOffset:newOffset];
            chunk = [chunk subdataWithRange:subDataRange];
            shouldReadMore = NO;
        }
        [currentData appendData:chunk];
    }

    NSString* line = [currentData stringValueWithEncoding:NSASCIIStringEncoding];
    return line;
}

- (NSString*)readLineBackwards {

    NSString * _lineDelimiter = @"\n";

    NSData* newLineData = [_lineDelimiter dataUsingEncoding:NSUTF8StringEncoding];
    NSUInteger _chunkSize = 10;

    NSMutableData* currentData = [[NSMutableData alloc] init];
    BOOL shouldReadMore = YES;

    while (shouldReadMore) {

        unsigned long long offset;

        NSUInteger currentChunkSize = _chunkSize;

        if ([self offsetInFile] <= _chunkSize) {
            offset = 0;
            currentChunkSize = [self offsetInFile];
            shouldReadMore = NO;
        } else {
            offset = [self offsetInFile] - _chunkSize;
        }

        NSLog(@"seek to offset %qu, offset in file is %qu", offset, [self offsetInFile]);

        [self seekToFileOffset:offset];

        NSData* chunk = [self readDataOfLength:currentChunkSize];

        NSRange newLineRange = [chunk rangeOfDataBackwardsSearch:newLineData];

        if (newLineRange.location == NSNotFound) {
            [self seekToFileOffset:offset];
        }

        if (newLineRange.location != NSNotFound) {
            NSUInteger subDataLoc = newLineRange.location;
            NSUInteger subDataLen = currentChunkSize - subDataLoc;
            chunk = [chunk subdataWithRange:NSMakeRange(subDataLoc, subDataLen)];
            NSLog(@"got chunk data %@", [chunk stringValueWithEncoding:NSASCIIStringEncoding]);
            shouldReadMore = NO;
            [self seekToFileOffset:offset + newLineRange.location];
        }
        [currentData prepend:chunk];
    }

    NSString* line = [[NSString alloc] initWithData:currentData encoding:NSASCIIStringEncoding];
    return [line autorelease];
}

@end





//
//  NSDataExtensions.m
//  LineReader
//
//  Created by Tobias Preuss on 08.10.10.
//  Copyright 2010 Tobias Preuss. All rights reserved.
//

#import "NSDataExtensions.h"



// -----------------------------------------------------------------------------
// NSData additions.
// -----------------------------------------------------------------------------


/**
 Extension of the NSData class. 
 Data can be found forwards or backwards. Further the extension supplies a function 
 to convert the contents to string for debugging purposes.
 @param Additions Category labeled Additions.
 @returns An initialized NSData object or nil if the object could not be created.
 */
@implementation NSData (Additions)




/**
 Returns a range of data.
 @param dataToFind Data object specifying the delimiter and encoding.
 @returns A range.
 */
- (NSRange)rangeOfData:(NSData*)dataToFind {

    const void* bytes = [self bytes];
    NSUInteger length = [self length];
    const void* searchBytes = [dataToFind bytes];
    NSUInteger searchLength = [dataToFind length];
    NSUInteger searchIndex = 0;

    NSRange foundRange = {NSNotFound, searchLength};
    for (NSUInteger index = 0; index < length; index++) {
        // The current character matches.
        if (((char*)bytes)[index] == ((char*)searchBytes)[searchIndex]) {
            // Store found location if not done earlier.
            if (foundRange.location == NSNotFound) {
                foundRange.location = index;
            }
            // Increment search character index to check for match.
            searchIndex++;
            // All search character match.
            // Break search routine and return found position.
            if (searchIndex >= searchLength) {
                return foundRange;
            }
        }
        // Match does not continue.
        // Return to the first search character.
        // Discard former found location.
        else {
            searchIndex = 0;
            foundRange.location = NSNotFound;
        }
    }
    return foundRange;
}


- (NSRange)rangeOfDataBackwardsSearch:(NSData*)dataToFind {

    const void* bytes = [self bytes];
    NSUInteger length = [self length];
    const void* searchBytes = [dataToFind bytes];
    NSUInteger searchLength = [dataToFind length];
    NSUInteger searchIndex = 0;

    NSRange foundRange = {NSNotFound, searchLength};
    if (length < searchLength) {
        return foundRange;
    }
    for (NSUInteger index = length - searchLength; index >= 0;) {
//      NSLog(@"%c == %c", ((char*)bytes)[index], ((char*)searchBytes)[searchIndex]); /* DEBUG LOG */
        if (((char*)bytes)[index] == ((char*)searchBytes)[searchIndex]) {
            // The current character matches.
            if (foundRange.location == NSNotFound) {
                foundRange.location = index;
            }
            index++;
            searchIndex++;
            if (searchIndex >= searchLength) {
                return foundRange;
            }
        }
        else {
            // Decrement to search backwards.
            if (foundRange.location == NSNotFound) {
                // Skip if first byte has been reached.
                if (index == 0) {
                    foundRange.location = NSNotFound;
                    return foundRange;
                }
                index--;
            }
            // Jump over the former found location
            // to avoid endless loop.
            else {
                index = index - 2;
            }
            searchIndex = 0;
            foundRange.location = NSNotFound;
        }
    }
    return foundRange;
}

- (NSString*)stringValueWithEncoding:(NSStringEncoding)encoding {
    return [[NSString alloc] initWithData:self encoding:encoding];
}

@end




// -----------------------------------------------------------------------------
// NSMutableData additions.
// -----------------------------------------------------------------------------


/**
 Extension of the NSMutableData class. 
 Data can be prepended in addition to the append function of the framework.
 @param Additions Category labeled Additions.
 @returns An initialized NSMutableData object or nil if the object could not be created.
 */
@implementation NSMutableData (Additions)

/**
    Inserts the data before the data of the object.
    @param data Data to be prepended.
 */
- (void)prepend:(NSData*)data {


    NSMutableData* concat = [NSMutableData dataWithData:data];
    [concat appendData:self];
    [self setData:concat];
}

@end

Это сработало для меня на Swift 5.

https://gist.github.com/sooop/a2b110f8eebdf904d0664ed171bcd7a2

Другие вопросы по тегам