Source code for hiresprv.database

"""
Access and query a user's observation database.
"""

import logging
import json
import urllib.parse

import requests
import http.cookiejar

import pandas as pd


[docs]class Database(object): """ Each workspace in HIRES PRV pipeline server contains a database listing all the data retrieved from KOA for the user. The ``hiresprv.status.Database`` class provides methods for querying that database. This information is primarily used to plan reduction and analysis processing. The initialization of database class loads the cookie file saved from HIRES PRV pipelone login, parse the cookie to look up the users workspace. Args: cookiepath (string): full path to a cookie file saved by :func:`hiresprv.auth.login()` """ cookiepath = '' userid = '' workspace = '' cookiestr = '' target = '' cmd = '' sql = '' format = 'html' filepath = '' disp = 1 content_type = '' debug = 0 debugfile = '' status = '' msg = '' def __init__(self, cookiepath, **kwargs): self.cookiepath = cookiepath if len(self.cookiepath) == 0: self.status = 'error' self.msg = 'Failed to find required parameter: cookiepath.' retval = dict() retval['status'] = self.status retval['msg'] = self.msg # TODO: Not good practice to put a return statement in an __init__ statement. # Probably should raise an exception instead return retval if len(kwargs) > 0: if 'debugfile' in kwargs.keys(): self.debugfile = kwargs['debugfile'] if len(self.debugfile) > 0: self.debug = 1 logging.basicConfig(filename=self.debugfile, level=logging.DEBUG) # TODO: If we just want to open this file I don't think we need the with statement. with open(self.debugfile, 'w') as fdebug: pass if self.debug: print('Enter database.init:') print('cookiepath= %s' % self.cookiepath) logging.debug('') logging.debug('Enter database.init:') logging.debug('cookiepath= %s' % self.cookiepath) self.cookiejar = http.cookiejar.MozillaCookieJar(self.cookiepath) # TODO: I think we can remove some of these redundant checks for ``len(self.cookiepath) > 0`` if len(self.cookiepath) > 0: try: self.cookiejar.load(ignore_discard=True, ignore_expires=True) if self.debug: logging.debug('cookie loaded from %s' % self.cookiepath) for cookie in self.cookiejar: if self.debug: logging.debug('cookie= %s' % cookie) logging.debug('cookie.name= %s' % cookie.name) logging.debug('cookie.value= %s' % cookie.value) logging.debug('cookie.domain= %s' % cookie.domain) if cookie.name == 'HIPRV': self.cookiestr = cookie.value # TODO: bare except except: pass if self.debug: logging.debug('loadCookie exception') if self.debug: logging.debug('cookiestr= %s' % self.cookiestr) if len(self.cookiestr) > 0: arr = self.cookiestr.split('|') narr = len(arr) if self.debug: logging.debug('narr= [%d]' % narr) for i in range(0, narr): logging.debug('arr[%d]= [%s]' % (i, arr[i])) if narr == 3: self.userid = arr[0] self.workspace = arr[2] if self.debug: logging.debug('userid= %s workspace= %s' % (self.userid, self.workspace)) self.url = 'http://hiresprv.ipac.caltech.edu/cgi-bin/idlDriver/nph-prvState?workspace=' + self.workspace return
[docs] def search(self, **kwargs): """ The search method is the most general mechanism for querying a workspace database. The user has the freedom to provide a general SQL SELECT statement (specifying both the database columns to be retrieved and constraints on the records returned). When the sql is blank (i.e.: ''), the entire database will be returned. The user can also select an output format (html, csv or IPAC ASCII). The output will be saved to a disk file and/or displayed in a browser depending on the format: 'cvs' format: save to disk, 'html' format: save to disk and display in browser, 'IPAC' ASCII format: save to disk and display in browser. Args: sql (string): (optional) fully qualified sql statement if empty string or parameter not set, the whole database table will be returned. format (string): (optional) string specifying the output format ('html'|'csv'|'ipac'); the default is html filepath (string): (optional) full path where the file will be saved; if not provided, a URL string to an HTML view of the table. Returns: string: URL to HTML table if filepath is not specified """ if self.debug: logging.debug('') logging.debug('Enter database.search:') self.sql = '' if (len(kwargs) > 0) and ('sql' in kwargs.keys()): self.sql = urllib.parse.quote(kwargs['sql']) self.filepath = '' if (len(kwargs) > 0) and ('filepath' in kwargs.keys()): self.filepath = kwargs['filepath'] self.format = 'html' if (len(kwargs) > 0) and ('format' in kwargs.keys()): self.format = kwargs['format'] if len(self.filepath) > 0: self.disp = 0 else: self.disp = 1 if self.debug: logging.debug('') logging.debug('sql= %s' % self.sql) logging.debug('format= %s' % self.format) logging.debug('filepath= %s' % self.filepath) logging.debug('disp= %d' % self.disp) url = self.url if len(self.filepath) > 0: url = url + '&filepath=' + self.filepath if len(self.format) > 0: url = url + '&format=' + self.format if len(self.sql) > 0: url = url + '&sql=' + self.sql if self.debug: logging.debug('') logging.debug('url= [%s]' % url) if self.disp == 1: # try: # webbrowser.open (url) # except: # pass return url self.__submit_request(url) if self.debug: logging.debug('') logging.debug('returned submit_request:') logging.debug('self.status= [%s]' % self.status) logging.debug('self.msg= [%s]' % self.msg) # # save to file is specified # if self.status == 'ok': self.__save_to_file(self.filepath) if self.debug: logging.debug('') logging.debug('returned save_to_file:') logging.debug('self.status= [%s]' % self.status) logging.debug('self.msg= [%s]' % self.msg) retval = dict() retval['status'] = self.status retval['msg'] = self.msg return retval
[docs] def target_list(self, **kwargs): """ Returns a list of all unique targets currently in the database. Args: None Returns: string: URL to HTML table if filepath is not specified """ if self.debug: logging.debug('') logging.debug('Enter database.target_list:') self.filepath = '' if (len(kwargs) > 0) and ('filepath' in kwargs.keys()): self.filepath = kwargs['filepath'] self.format = 'html' if (len(kwargs) > 0) and ('format' in kwargs.keys()): self.format = kwargs['format'] if len(self.filepath) > 0: self.disp = 0 else: self.disp = 1 self.sql = urllib.parse.quote('select distinct TARGET from FILES;'.replace(' ', '%20')) url = self.url + '&format=' + self.format + '&sql=' + self.sql if self.debug: logging.debug('') logging.debug('sql= %s' % self.sql) logging.debug('filepath= %s' % self.filepath) logging.debug('format= %s' % self.format) logging.debug('disp= %d' % self.disp) logging.debug('url= [%s]' % url) if self.disp == 1: # webbrowser.open (url) return url self.__submit_request(url) if self.debug: logging.debug('') logging.debug('returned submit_request:') logging.debug('self.status= [%s]' % self.status) logging.debug('self.msg= [%s]' % self.msg) # # save to file is specified # if self.status == 'ok': self.__save_to_file(self.filepath) if self.debug: logging.debug('') logging.debug('returned save_to_file:') logging.debug('self.status= [%s]' % self.status) logging.debug('self.msg= [%s]' % self.msg) retval = dict() retval['status'] = self.status retval['msg'] = self.msg return retval
[docs] def target_info(self, target, **kwargs): """ This method retrieves the database records for all data pertaining to a specific target. Args: target (string): target name format (string): (optional) string specifying the output format ('html'|'csv'|'ipac'); the default is html filepath (string): (optional) full path where the file will be saved; if not provided, a URL string to an HTML view of the table. Returns: string: URL to HTML table if filepath is not specified """ if self.debug: logging.debug('') logging.debug('Enter database.target_info: target= %s' % target) self.filepath = '' if (len(kwargs) > 0) and ('filepath' in kwargs.keys()): self.filepath = kwargs['filepath'] self.format = 'html' if (len(kwargs) > 0) and ('format' in kwargs.keys()): self.format = kwargs['format'] if len(self.filepath) > 0: self.disp = 0 else: self.disp = 1 self.sql = urllib.parse.quote("select * from FILES where target like '%" + target + "%';") url = self.url + '&format=' + self.format + '&sql=' + self.sql if self.debug: logging.debug('') logging.debug('sql= %s' % self.sql) logging.debug('filepath= %s' % self.filepath) logging.debug('format= %s' % self.format) logging.debug('disp= %d' % self.disp) logging.debug('url= [%s]' % url) if self.disp == 1: # webbrowser.open (url) return url self.__submit_request(url) if self.debug: logging.debug('') logging.debug('returned submit_request:') logging.debug('self.status= [%s]' % self.status) logging.debug('self.msg= [%s]' % self.msg) # # save to file is specified # if self.status == 'ok': self.__save_to_file(self.filepath) if self.debug: logging.debug('') logging.debug('returned save_to_file:') logging.debug('self.status= [%s]' % self.status) logging.debug('self.msg= [%s]' % self.msg) retval = dict() retval['status'] = self.status retval['msg'] = self.msg return retval
[docs] def sqlite(self, filepath): """ Downloads the sqlite database file Args: filepath (string): path and filename to save the database file on the disk. """ self.filepath = filepath self.cmd = 'sqlite' if self.debug: logging.debug('') logging.debug('Enter database.sqlite: filepath = %s' % self.filepath) if len(self.filepath) == 0: self.status = 'error' self.msg = 'Input argument filepath is required.' retval = dict() retval['status'] = self.status retval['msg'] = self.msg return retval url = self.url + '&cmd=' + self.cmd if self.debug: logging.debug('') logging.debug('sqlite= %s' % self.sqlite) logging.debug('self.url= [%s]' % self.url) self.__submit_request(url) if self.debug: logging.debug('') logging.debug('returned submit_request:') logging.debug('self.status= [%s]' % self.status) logging.debug('self.msg= [%s]' % self.msg) if self.status == 'ok': if self.debug: logging.debug('') self.__save_to_file(self.filepath) if self.debug: logging.debug('') logging.debug('returned save_to_file:') logging.debug('self.status= [%s]' % self.status) logging.debug('self.msg= [%s]' % self.msg) retval = dict() retval['status'] = self.status retval['msg'] = self.msg return retval
[docs] def rv_observations(self, target): """ Return metadata for all RV observations associated with a given target Args: target (string): target name """ url = self.search(sql="SELECT * from FILES WHERE TARGET like '{}' AND OBTYPE like 'RV observation'".format(target)) return url
[docs] @staticmethod def to_pandas(url): """ Convert URL for HTML table into a Pandas DataFrame Args: url (string): database search url (produced by Database methods like `search` or `target_info`) Returns: DataFrame """ assert "format=html" in url, "Must request table in HTML format" try: return pd.read_html(url, header=0)[0] except ImportError: # html5lib error raised if empty result return pd.DataFrame([])
def __submit_request(self, url): if self.debug: logging.debug('') logging.debug('Enter database.__submit_request:') logging.debug('sql= %s' % self.sql) logging.debug('sqlite= %s' % self.sqlite) logging.debug('format= %s' % self.format) logging.debug('url= [%s]' % url) try: self.response = requests.get(url, stream=True) if self.debug: logging.debug('') logging.debug('request sent') except Exception as e: if self.debug: logging.debug('') logging.debug('exception: e= %s' % str(e)) self.status = 'error' self.msg = 'Failed to submit the request: ' + str(e) return if self.debug: logging.debug('') logging.debug('status_code:') logging.debug(self.response.status_code) if self.response.status_code == 200: self.status = 'ok' self.msg = '' else: self.status = 'error' self.msg = 'Failed to submit the request' if self.debug: logging.debug('') logging.debug('headers: ') logging.debug(self.response.headers) self.content_type = self.response.headers['Content-type'] if self.content_type == 'json': if self.debug: logging.debug('') logging.debug('return is a json structure: error message') jsondata = json.loads(self.response.text) self.status = jsondata['status'] self.msg = jsondata['msg'] return def __save_to_file(self, filepath): if self.debug: logging.debug('') logging.debug('Enter database.__save_to_file:') logging.debug('filepath= %s' % filepath) try: with open(filepath, 'wb') as fd: for chunk in self.response.iter_content(chunk_size=512): fd.write(chunk) except NameError as e: if self.debug: logging.debug('') logging.debug('exception: e= %s' % e) self.status = 'error' self.msg = 'Failed to save returned data to file: %s' % filepath return self.status = 'ok' self.msg = '' return