Source code for hiresprv.database

"""
Access and query a user's observation database.
"""

import logging
import json
import urllib.parse

import requests
import http.cookiejar

import pandas as pd


[docs]class Database(object):
    """
    Each workspace in HIRES PRV pipeline server contains a database
    listing all the data retrieved from KOA for the user.

    The ``hiresprv.status.Database`` class provides methods for querying that
    database.  This information is primarily used to plan reduction
    and analysis processing.

    The initialization of database class loads the cookie file saved
    from HIRES PRV pipelone login, parse the cookie to look up the
    users workspace.

    Args:
        cookiepath (string): full path to a cookie file saved by :func:`hiresprv.auth.login()`

    """

    cookiepath = ''
    userid = ''
    workspace = ''
    cookiestr = ''

    target = ''
    cmd = ''
   
    sql = ''
    format = 'html'
    filepath = ''
    disp = 1 
    
    content_type = ''

    debug = 0
    debugfile = '' 

    status = ''
    msg = ''
    
    def __init__(self, cookiepath, **kwargs):
        self.cookiepath = cookiepath

        if len(self.cookiepath) == 0:
            
            self.status = 'error'
            self.msg = 'Failed to find required parameter: cookiepath.'
            
            retval = dict()
            retval['status'] = self.status
            retval['msg'] = self.msg

            # TODO: Not good practice to put a return statement in an __init__ statement.
            # Probably should raise an exception instead
            return retval

        if len(kwargs) > 0:
       
            if 'debugfile' in kwargs.keys():
                self.debugfile = kwargs['debugfile']
         
        if len(self.debugfile) > 0:
            
            self.debug = 1
            logging.basicConfig(filename=self.debugfile, level=logging.DEBUG)

            # TODO: If we just want to open this file I don't think we need the with statement.
            with open(self.debugfile, 'w') as fdebug:
                pass
         
        if self.debug:
            print('Enter database.init:')
            print('cookiepath= %s' % self.cookiepath)

            logging.debug('')
            logging.debug('Enter database.init:')
            logging.debug('cookiepath= %s' % self.cookiepath)
   
        self.cookiejar = http.cookiejar.MozillaCookieJar(self.cookiepath)

        # TODO: I think we can remove some of these redundant checks for ``len(self.cookiepath) > 0``
        if len(self.cookiepath) > 0:
   
            try: 
                self.cookiejar.load(ignore_discard=True, ignore_expires=True)
    
                if self.debug:
                    logging.debug('cookie loaded from %s' % self.cookiepath)
        
                for cookie in self.cookiejar:
                    
                    if self.debug:
                        logging.debug('cookie= %s' % cookie)
                        logging.debug('cookie.name= %s' % cookie.name)
                        logging.debug('cookie.value= %s' % cookie.value)
                        logging.debug('cookie.domain= %s' % cookie.domain)
                    
                    if cookie.name == 'HIPRV':
                        self.cookiestr = cookie.value

            # TODO: bare except
            except:
                pass

                if self.debug:
                    logging.debug('loadCookie exception')
 
        if self.debug:
            logging.debug('cookiestr= %s' % self.cookiestr)
       
        if len(self.cookiestr) > 0:
            arr = self.cookiestr.split('|')
            narr = len(arr)
        
        if self.debug:
            logging.debug('narr= [%d]' % narr)
            for i in range(0, narr):
                logging.debug('arr[%d]= [%s]' % (i, arr[i]))
        
        if narr == 3:
            self.userid = arr[0]
            self.workspace = arr[2]

        if self.debug:
            logging.debug('userid= %s workspace= %s' % (self.userid, self.workspace))
       
        self.url = 'http://hiresprv.ipac.caltech.edu/cgi-bin/idlDriver/nph-prvState?workspace=' + self.workspace

        return

[docs]    def search(self, **kwargs):
        """
        The search method is the most general mechanism for querying 
        a workspace database.  The user has the freedom to provide a
        general SQL SELECT statement (specifying both the database columns 
        to be retrieved and constraints on the records returned).

        When the sql is blank (i.e.: ''), the entire database will be 
        returned.

        The user can also select an output format (html, csv or IPAC ASCII).
        
        The output will be saved to a disk file and/or displayed in a browser
        depending on the format:
       
        'cvs' format: save to disk,
        'html' format: save to disk and display in browser,
        'IPAC' ASCII format: save to disk and display in browser.
        
        Args:
            sql (string):    (optional) fully qualified sql statement
                             if empty string or parameter not set, the whole database table will be returned.
            format (string): (optional) string specifying the output format ('html'|'csv'|'ipac'); the default is html
            filepath (string): (optional) full path where the file will be saved; if not provided,
                               a URL string to an HTML view of the table.

        Returns:
            string: URL to HTML table if filepath is not specified
        """
        
        if self.debug:
            logging.debug('')
            logging.debug('Enter database.search:')
       
        self.sql = ''
        if (len(kwargs) > 0) and ('sql' in kwargs.keys()):
            self.sql = urllib.parse.quote(kwargs['sql'])

        self.filepath = ''
        if (len(kwargs) > 0) and ('filepath' in kwargs.keys()):
            self.filepath = kwargs['filepath']

        self.format = 'html'
        if (len(kwargs) > 0) and ('format' in kwargs.keys()):
            self.format = kwargs['format']
        
        if len(self.filepath) > 0:
            self.disp = 0
        else:
            self.disp = 1
 
        if self.debug:
            logging.debug('')
            logging.debug('sql= %s' % self.sql)
            logging.debug('format= %s' % self.format)
            logging.debug('filepath= %s' % self.filepath)
            logging.debug('disp= %d' % self.disp)
        
        url = self.url 
        if len(self.filepath) > 0:
            url = url + '&filepath=' + self.filepath

        if len(self.format) > 0:
            url = url + '&format=' + self.format
            
        if len(self.sql) > 0:
            url = url + '&sql=' + self.sql

        if self.debug:
            logging.debug('')
            logging.debug('url= [%s]' % url)

        if self.disp == 1:
            
            # try:
            #     webbrowser.open (url)
            # except:
            #     pass

            return url

        self.__submit_request(url)
        
        if self.debug:
            logging.debug('')
            logging.debug('returned submit_request:')
            logging.debug('self.status= [%s]' % self.status)
            logging.debug('self.msg= [%s]' % self.msg)

#
#    save to file is specified
#
        if self.status == 'ok':

            self.__save_to_file(self.filepath)
        
            if self.debug:
                logging.debug('')
                logging.debug('returned save_to_file:')
                logging.debug('self.status= [%s]' % self.status)
                logging.debug('self.msg= [%s]' % self.msg)
       
        retval = dict()
        retval['status'] = self.status
        retval['msg'] = self.msg

        return retval

[docs]    def target_list(self, **kwargs):
        """
        Returns a list of all unique targets currently in
        the database.
        
        Args:
            None
        Returns:
            string: URL to HTML table if filepath is not specified
        """
        
        if self.debug:
            logging.debug('')
            logging.debug('Enter database.target_list:')
        
        self.filepath = ''
        if (len(kwargs) > 0) and ('filepath' in kwargs.keys()):
            self.filepath = kwargs['filepath']

        self.format = 'html'
        if (len(kwargs) > 0) and ('format' in kwargs.keys()):
            self.format = kwargs['format']
            
        if len(self.filepath) > 0:
            self.disp = 0 
        else:
            self.disp = 1
  
        self.sql = urllib.parse.quote('select distinct TARGET from FILES;'.replace(' ', '%20'))

        url = self.url + '&format=' + self.format + '&sql=' + self.sql

        if self.debug:
            logging.debug('')
            logging.debug('sql= %s' % self.sql)
            logging.debug('filepath= %s' % self.filepath)
            logging.debug('format= %s' % self.format)
            logging.debug('disp= %d' % self.disp)
            logging.debug('url= [%s]' % url)

        if self.disp == 1:
            
            # webbrowser.open (url)
            return url
          
        self.__submit_request(url)
        
        if self.debug:
            logging.debug('')
            logging.debug('returned submit_request:')
            logging.debug('self.status= [%s]' % self.status)
            logging.debug('self.msg= [%s]' % self.msg)

#
#    save to file is specified
#
        if self.status == 'ok':

            self.__save_to_file(self.filepath)
        
            if self.debug:
                logging.debug('')
                logging.debug('returned save_to_file:')
                logging.debug('self.status= [%s]' % self.status)
                logging.debug('self.msg= [%s]' % self.msg)
       
        retval = dict()
        retval['status'] = self.status
        retval['msg'] = self.msg

        return retval

[docs]    def target_info(self, target, **kwargs):
        """
        This method retrieves the database records for all data pertaining to a specific
        target.
        
        Args:
            target (string): target name
            format (string): (optional) string specifying the output format ('html'|'csv'|'ipac'); the default is html
            filepath (string): (optional) full path where the file will be saved; if not provided,
                               a URL string to an HTML view of the table.

        Returns:
            string: URL to HTML table if filepath is not specified
        """
       
        if self.debug:
            logging.debug('')
            logging.debug('Enter database.target_info: target= %s' % target)

        self.filepath = ''
        if (len(kwargs) > 0) and ('filepath' in kwargs.keys()):
            self.filepath = kwargs['filepath']

        self.format = 'html'
        if (len(kwargs) > 0) and ('format' in kwargs.keys()):
            self.format = kwargs['format']
            
        if len(self.filepath) > 0:
            self.disp = 0 
        else:
            self.disp = 1
        
        self.sql = urllib.parse.quote("select * from FILES where target like '%" + target + "%';")

        url = self.url + '&format=' + self.format + '&sql=' + self.sql

        if self.debug:
            logging.debug('')
            logging.debug('sql= %s' % self.sql)
            logging.debug('filepath= %s' % self.filepath)
            logging.debug('format= %s' % self.format)
            logging.debug('disp= %d' % self.disp)
            logging.debug('url= [%s]' % url)

        if self.disp == 1:
            
            # webbrowser.open (url)
            return url

        self.__submit_request(url)
        
        if self.debug:
            logging.debug('')
            logging.debug('returned submit_request:')
            logging.debug('self.status= [%s]' % self.status)
            logging.debug('self.msg= [%s]' % self.msg)

#
#    save to file is specified
#
        if self.status == 'ok':

            self.__save_to_file(self.filepath)
        
            if self.debug:
                logging.debug('')
                logging.debug('returned save_to_file:')
                logging.debug('self.status= [%s]' % self.status)
                logging.debug('self.msg= [%s]' % self.msg)
       
        retval = dict()
        retval['status'] = self.status
        retval['msg'] = self.msg

        return retval

[docs]    def sqlite(self, filepath):
        """
        Downloads the sqlite database file
        
        Args:
            filepath (string): path and filename to save the database file
                               on the disk.

        """
        
        self.filepath = filepath
        self.cmd = 'sqlite'

        if self.debug:
            logging.debug('')
            logging.debug('Enter database.sqlite: filepath = %s' % self.filepath)
       
        if len(self.filepath) == 0:
            
            self.status = 'error'
            self.msg = 'Input argument filepath is required.'
            
            retval = dict()
            retval['status'] = self.status
            retval['msg'] = self.msg

            return retval

        url = self.url + '&cmd=' + self.cmd

        if self.debug:
            logging.debug('')
            logging.debug('sqlite= %s' % self.sqlite)
            logging.debug('self.url= [%s]' % self.url)

        self.__submit_request(url)
        
        if self.debug:
            logging.debug('')
            logging.debug('returned submit_request:')
            logging.debug('self.status= [%s]' % self.status)
            logging.debug('self.msg= [%s]' % self.msg)

        if self.status == 'ok':

            if self.debug:
                logging.debug('')
            
            self.__save_to_file(self.filepath)
        
            if self.debug:
                logging.debug('')
                logging.debug('returned save_to_file:')
                logging.debug('self.status= [%s]' % self.status)
                logging.debug('self.msg= [%s]' % self.msg)
        
        retval = dict()
        retval['status'] = self.status
        retval['msg'] = self.msg

        return retval

[docs]    def rv_observations(self, target):
        """
        Return metadata for all RV observations associated with a given target

        Args:
            target (string): target name


        """

        url = self.search(sql="SELECT * from FILES WHERE TARGET like '{}' AND OBTYPE like 'RV observation'".format(target))

        return url

[docs]    @staticmethod
    def to_pandas(url):
        """
        Convert URL for HTML table into a Pandas DataFrame

        Args:
            url (string): database search url (produced by Database methods like `search` or `target_info`)

        Returns:
            DataFrame
        """

        assert "format=html" in url, "Must request table in HTML format"

        try:
            return pd.read_html(url, header=0)[0]
        except ImportError:  # html5lib error raised if empty result
            return pd.DataFrame([])

    def __submit_request(self, url):

        if self.debug:
            logging.debug('')
            logging.debug('Enter database.__submit_request:')
            logging.debug('sql= %s' % self.sql)
            logging.debug('sqlite= %s' % self.sqlite)
            logging.debug('format= %s' % self.format)
            logging.debug('url= [%s]' % url)
        
        try:
            self.response = requests.get(url, stream=True)

            if self.debug:
                logging.debug('')
                logging.debug('request sent')
        
        except Exception as e:
            
            if self.debug:
                logging.debug('')
                logging.debug('exception: e= %s' % str(e))

            self.status = 'error'
            self.msg = 'Failed to submit the request: ' + str(e)
            return
        
        if self.debug:
            logging.debug('')
            logging.debug('status_code:')
            logging.debug(self.response.status_code)

        if self.response.status_code == 200:
            self.status = 'ok'
            self.msg = ''
        else:
            self.status = 'error'
            self.msg = 'Failed to submit the request'
            
        if self.debug:
            logging.debug('')
            logging.debug('headers: ')
            logging.debug(self.response.headers)

        self.content_type = self.response.headers['Content-type']
       
        if self.content_type == 'json':
            
            if self.debug:
                logging.debug('')
                logging.debug('return is a json structure: error message')
            
            jsondata = json.loads(self.response.text)
            
            self.status = jsondata['status']
            self.msg = jsondata['msg']

        return

    def __save_to_file(self, filepath):

        if self.debug:
            logging.debug('')
            logging.debug('Enter database.__save_to_file:')
            logging.debug('filepath= %s' % filepath)
       
        try:
            with open(filepath, 'wb') as fd:

                for chunk in self.response.iter_content(chunk_size=512):
                    fd.write(chunk)
            
        except NameError as e:

            if self.debug:
                logging.debug('')
                logging.debug('exception: e= %s' % e)

            self.status = 'error'
            self.msg = 'Failed to save returned data to file: %s' % filepath
            return

        self.status = 'ok'
        self.msg = ''
        return