#!/usr/bin/env python3
# Roger Volden

import sys

def oldReader(inFile):
    '''Takes a filename and returns
    a list of headers and sequences'''
    headers, sequences = [], []
    for line in open(inFile):
        line = line.rstrip()
        if line == '':
            continue
        if line[0] == '>':
            headers.append(line[1:])
        else:
            sequences.append(line)
    return headers, sequences

def readFasta(inFile):
    '''Take a filename and return a read dictionary
    readDict = {header:sequence, ...}
    '''
    readDict = {}
    for line in open(inFile):
        line = line.rstrip()
        if line == '':
            continue
        if line[0] == '>':
            readDict[line[1:]] = ''
            lastHead = line[1:]
        else:
            readDict[lastHead] += line
    return readDict

def readGenome(inFile):
    '''
    Joining a list of strings is faster than repeatedly
    concatenating strings. This is especially apparent
    when reading in a genome, where the readFasta method
    in this program will be too slow for technical reasons.
    '''
    readDict = {}
    for line in open(inFile):
        line = line.rstrip()
        if not line:
            continue
        if line.startswith('>'):
            # this will only happen if sequences have been read
            # before. It'll turn the list of seqs into a string.
            if readDict:
                readDict[lastHead] = ''.join(readDict[lastHead])
            # header:empty list to put sequences into
            readDict[line[1:]] = []
            lastHead = line[1:]
        else:
            # append that list if you aren't on a header line
            readDict[lastHead].append(line.upper())
    # covers the last sequence read
    readDict[lastHead] = ''.join(readDict[lastHead])
    return readDict

def main():
    print(sys.argv)
    reads = readGenome(sys.argv[1])
    print(reads)
#    reads = readFasta(sys.argv[1])
#    print(reads['chrI'][100000:100100])
#    for h, s in reads.items():
#        print(h, len(s))

main()