Source code for potpyri.scripts.archives.download_gemini_data

import requests
import sys
import os
import tarfile
import glob
import copy
import numpy as np

from astropy.time import Time, TimeDelta
from astropy import units as u


[docs]
def add_options(parser=None, usage=None):
    import argparse
    if parser == None:
        parser = argparse.ArgumentParser(usage=usage,conflict_handler='resolve')

    # Basic arguments and options
    parser.add_argument('--progids', type=str,
        help='Comma-separated list of program IDs.')
    parser.add_argument('--cookie-file','-cf', type=str, default=None,
        help='Path to cookie file for downloading from Gemini archive.')
    parser.add_argument('--date', nargs='+', type=str, default=[],
        help='Either a single date on which to download science data or a '+\
        'range of dates for downloading science data.')
    parser.add_argument('--clobber', default=False, action='store_true',
        help='Clobber files that already exist in output path instead of '+\
        'downloading them again.')
    parser.add_argument('--outdir', type=str, default='.',
        help='Output data to input directory.  Default is current directory.')

    options = parser.parse_args()

    return(options)



[docs]
def get_observation_data(progid, archive_url='https://archive.gemini.edu/',
    feature='jsonsummary', cookie=None):
    """
    Connect to the Gemini server given a program ID and local path to the cookie



    Inputs 
        progid : program ID
        archive_url : path to gemini server
        feature : format of the data
        cookie : path to the cookie stored locally   

    Outputs
        observational data 


    """

    fullurl = os.path.join(archive_url, feature, progid)
    if cookie:
        r = requests.get(fullurl, cookies=cookie)
    else:
        r = requests.get(fullurl)

    if r.status_code==200:
        data = r.json()
        return(data)
    else:
        return([])



[docs]
def load_cookie(cfile):
    """
    read an upload the cookie to the server


    Inputs 
        cfile : path to cookie

    Outputs
        dict with the cookie path

    """

    if not os.path.exists(cfile):
        print ('not cookie path found')
        return(None)
    
    with open(cfile, 'r') as f:   
        cookie = f.readline().replace('\n','')
        return(dict(gemini_archive_session=cookie))

    


[docs]
def get_full_outname(fileobj, makedirs=True, forcedir='', outdir=''):
    """
    Create the path to where store a file given the output directory and file name


    Inputs 
        fileobj : data file name
        makedirs : (bool) whether or not to create a folder if it does not exist (default = True)
        forcedir : forced path for the ouput directory. Default = ''
        outdir : path to the output directory. Default = ''

    Outputs
        fullfilename : path to the output location and file name
    """
    t = Time(fileobj['ut_datetime'])
    basedir = outdir + '/'# + t.datetime.strftime('ut%y%m%d/')
    if forcedir:
        basedir = forcedir
    if fileobj['observation_type'].lower()=='object':
        basedir = basedir + 'science/'
    elif fileobj['observation_type'].lower() in ['arc','flat','bias']:
        basedir = basedir + 'cals/'
    if not os.path.exists(basedir) and makedirs:
        print(f'Making: {basedir}')
        os.makedirs(basedir)

    fileobj_name = fileobj['name'].replace('.fits','')
    fileobj_name = fileobj_name.replace('_bias','')
    if fileobj_name.startswith('g'):
        fileobj_name = fileobj_name[1:]

    fullfilename = basedir + fileobj_name + '.fits'
    return(fullfilename)




[docs]
def mask_object_spectral_observation(data, date=[]):
    """
    Mask the json filelist to only spectral/science/OBJECT observations


    Inputs 
        data : downloaded data
        date : observation date

    Outputs
        newlist :  list of only spectral science files

    """
    newlist = []
    for fileobj in data:
        if 'mode' not in fileobj.keys(): continue
        if 'observation_type' not in fileobj.keys(): continue
        mode = fileobj['mode'].lower()
        obstype = fileobj['observation_type'].lower()
        obsdate = Time(fileobj['ut_datetime'])

        # Check range of input dates
        if date:
            if len(date)==1:
                t0 = Time(date[0])
                t1 = t0 + TimeDelta(86400.0 * u.s)
            elif len(date)==2:
                t0 = Time(date[0])
                t1 = Time(date[1])
            if obsdate < t0 or obsdate > t1:
                continue

        if ((mode=='imaging') and (obstype=='object')):
            newlist.append(fileobj)

    return(newlist)


# Query gemini archive for calibration files associated with the input fileobj

[docs]
def get_associated_cals(fileobj, archive_url='https://archive.gemini.edu/',
    cookie=None, delta_days=[0.0,0.0], cal_types=['BIAS','FLAT'],
    caljson={}):
    """
    look for calibration files and save them if they dont exist already


    Inputs 
        fileobj : data file that was downloaded
        archive_url : gemini server
        cookie : path to local cookie
        delta_days : how far away from the obseravtion date to search calibrations for
        cal_types : types of calibration files to look for
        caljson : dict of calibration files already existing

    Outputs
        cals :  list of calibration files
        caljson : updated dict of saved calibration files 
    """

    # get date of observation
    if ('ut_datetime' not in fileobj.keys() or
        'mode' not in fileobj.keys()):
        return([])

    # Need to match detector mode, ROI, binning, slitmask, disperser, camera
    mode=fileobj['mode'].lower()
    roi=fileobj['detector_roi_setting'].lower()
    binning=fileobj['detector_binning'].lower()
    mask=fileobj['focal_plane_mask'].lower()
    disperser=fileobj['disperser'].lower()
    camera=fileobj['camera'].lower()

    feature = 'jsonsummary/'
    cals = []

    for dd in np.arange(delta_days[0], delta_days[1]+1):
        t = Time(fileobj['ut_datetime']) + TimeDelta(dd, format='jd')
        date = t.datetime.strftime('%Y%m%d')

        data = []
        if date in caljson.keys():
            data = caljson[date]
        else:
            url = os.path.join(archive_url, feature, date)
            print(f'Checking {url}')
            r = requests.get(url, cookies=cookie)

            if r.status_code==200:
                data = r.json()
                caljson[date]=data
            else:
                raise Exception(f'ERROR: could not get cal data from Gemini archive.')

        for dat in data:
            # All calibration frames must match these conditions
            if not dat['camera']: continue
            if dat['camera'].lower()!=camera: continue
            if dat['detector_roi_setting'].lower()!=roi: continue
            if dat['detector_binning'].lower()!=binning: continue

            # Get bias frames
            if dat['observation_type']=='BIAS' and 'BIAS' in cal_types:
                cals.append(dat)
                continue

            # Imager setup is important for FLAT
            if dat['mode'].lower()!=mode: continue
            if dat['focal_plane_mask'].lower()!=mask: continue
            if dat['disperser'].lower()!=disperser: continue

            # Get flat frames
            if dat['observation_type']=='FLAT' and 'FLAT' in cal_types:
                cals.append(dat)
                continue

    return(cals, caljson)




[docs]
def unpack_tarfile(outtarname):
    """
    unpack downloaded tar file and remove the tar file after.


    Inputs 
        outtarname : the tar file

    """
    basedir = os.path.split(outtarname)[0]
    tar = tarfile.open(outtarname, 'r')
    tar.extractall(basedir)
    tar.close()

    if os.path.exists(basedir+'/md5sums.txt'):
        os.remove(basedir+'/md5sums.txt')
    if os.path.exists(basedir+'/README.txt'):
        os.remove(basedir+'/README.txt')


    # bunzip2 all bz2 files
    for file in glob.glob(basedir + '/*.bz2'):
        os.system('bunzip2 {0}'.format(file))

    # Clean up tar file
    os.remove(outtarname)



[docs]
def download_file(fileobj, outfilename, archive_url='https://archive.gemini.edu/',
    cookie=None, symlink=''):

    """
    Download files if they do not exist already 


    Inputs 
        fileobj : file to be dowloaded
        outfilename : full path name for the file to be downloaded
        archive_url : path to the sever
        cookie : path to the localy-saved cookie
        symlink : path where to duplicate the file (science / cals folders in addition to outfilename)

    Outputs
        Bool : success or not for the download of each file
    """

    # Color strings for download messages
    green = '\033[1;32;40m'
    red = '\033[1;31;40m'
    end = '\033[0;0m'

    feature = 'download'
    url = os.path.join(archive_url, feature, 'Filename')
    fileobj_name = fileobj['name'].replace('.fits','')
    fileobj_name = fileobj_name.replace('_bias','')
    if fileobj_name.startswith('g'):
        fileobj_name = fileobj_name[1:]

    url = os.path.join(url, fileobj_name)

    if os.path.exists(outfilename):
        print(f'{outfilename} already exists.  Skipping download.')
        return(True)

    message = f'Downloading: {outfilename}'
    sys.stdout.write(message.format(url=url))
    sys.stdout.flush()
    


    if cookie:
        r = requests.get(url, stream=True, cookies=cookie)

    else:
        r = requests.get(url, stream=True)

    if r.status_code==200:
        basedir = os.path.split(outfilename)[0]

        outtarname = basedir + '/' + fileobj['name'].split('.')[0] + '.tar'
        

        chunk_size = 256

        with open(outtarname, 'wb') as file:
            for data in r.iter_content(chunk_size):

                file.write(data)

        unpack_tarfile(outtarname)

        if symlink:
            if os.path.exists(outfilename):
                symlinkdir = os.path.split(symlink)[0]
                if not os.path.exists(symlinkdir):
                    print(f'\nMaking directory: {symlinkdir}')
                    os.makedirs(symlinkdir)
                os.symlink(outfilename, symlink)

        if os.path.exists(outfilename):
            message = '\r' + message
            message += green+' [SUCCESS]'+end+'\n'
            sys.stdout.write(message)
            return(True)
        else:
            message = '\r' + message
            message += red+' [FAILURE]'+end+'\n'
            sys.stdout.write(message)
            return(False)

    message = '\r' + message
    message += red+' [FAILURE]'+end+'\n'
    sys.stdout.write(message)
    return(False)



[docs]
def download_data(progid, date, cookie_file, directory):
    """
    Downloads the observational data from the Gemini server


    Inputs 
        progid : program ID
        date : date of observations
        cookie_file : path to the cookie file
        directory : path to the directory to store the data


    """

    programs = progid.split(',')
    cookie = load_cookie(cookie_file)


    outdir = directory
    dates = [date]
    

    if len(dates)>2:
        raise Exception(f'ERROR: dates should be 0, 1, or 2 arguments.  '+\
            'See download_gemini_data.py -h.')

    if not os.path.exists(outdir):
        os.makedirs(outdir)

    caljson = {}
    for progid in programs:
        # get all data
        data = get_observation_data(progid, cookie=cookie)

        # filter for only scientific/spectroscopic data
        data = mask_object_spectral_observation(data, date=dates)

        for fileobj in data:
            fullfilename = get_full_outname(fileobj, outdir=outdir)

            # if the file exists, skip it
            if os.path.exists(fullfilename):
                print(f'WARNING: {fullfilename} exists.  Continuing...')
                continue
            basedir = os.path.split(fullfilename)[0].replace('science','')
            symlinkname = fullfilename.replace('rawdata/','workspace/')
            symlinkname = symlinkname.replace('science/','')
            # try downloading the file and give exit status
            check = download_file(fileobj, fullfilename, cookie=cookie,
                symlink=symlinkname)

            print('Checking for cals...')
            # check for associated calibration files
            cals, caljson = get_associated_cals(fileobj, cookie=cookie,
                caljson=caljson)
            nbias = len([c for c in cals if c['observation_type']=='BIAS'])
            nflat = len([c for c in cals if c['observation_type']=='FLAT'])
            delta_days = 1
            # Search for bias and flat 1 day in the future
            cal_types = []
            if nbias < 5: cal_types.append('BIAS')
            if nflat < 5: cal_types.append('FLAT')
            if cal_types:
                print('Checking for additional cals...')
                add_cals, caljson = get_associated_cals(fileobj, cookie=cookie,
                    delta_days=[-30,30], cal_types=cal_types)
                cals.extend(add_cals)

            # Get unique cals
            names = []
            modcals = []
            for c in cals:
                if c['name'] not in names:
                    modcals.append(c) ; names.append(c['name'])
            cals = copy.copy(modcals)

            ncals = len(cals)
            nbias = len([c for c in cals if c['observation_type']=='BIAS'])
            nflat = len([c for c in cals if c['observation_type']=='FLAT'])

            m = f'Grabbing {ncals} calibration frames: '
            m += f'{nbias} bias, {nflat} flats'

            for cal in cals:
                fullfilename = get_full_outname(cal, forcedir=basedir,
                    outdir=outdir)
                symlinkname = fullfilename.replace('rawdata/','workspace/')
                symlinkname = symlinkname.replace('cals/','')
                check = download_file(cal, fullfilename, cookie=cookie,
                    symlink=symlinkname)



[docs]
def main():
    args = add_options()
    download_data(args.progids, args.date, args.cookie_file, args.outdir)


if __name__=="__main__":
    main()