Source code for singlet.io.googleapi.samplesheet

# vim: fdm=indent
# author:     Fabio Zanini
# date:       16/01/17
# content:    Google Sheets API for the sample sheet.
# Modules
import os
import numpy as np
import pandas as pd

from .googleapi import GoogleIOError, GoogleAPI


# Globals


# Classes / functions
[docs]class SampleSheet(GoogleAPI): def __init__(self, sheet): self.sheetname = sheet['sheet'] sheetid = sheet['google_id'] client_id_filename = sheet['client_id_filename'] client_secret_filename = sheet['client_secret_filename'] super().__init__( sheetid, self.sheetname, client_id_filename, client_secret_filename)
[docs] def get_number_virus_reads(self, virus, icols=None): '''Get the number of virus reads from the spreadsheet''' sheetname = 'sequenced' vircolname = 'number'+virus.capitalize()+'Reads' if icols is None: icols = self.get_header_columns_indices( ['name', 'experiment', vircolname], sheetname) if 'name' not in icols: raise ValueError('name must be part of the icols dict') if vircolname not in icols: raise ValueError(vircolname+' must be part of the icols dict') # Ask for the name first, that determines the range colnames = ['name'] + [cn for cn in icols if cn !='name'] # Get the values data = {} for colname in colnames: icol = icols[colname] # Google figures out the max row number if colname == 'name': rangeName = sheetname+'!'+icol+'2:'+icol+'100000' else: nNames = len(data['name']) rangeName = sheetname+'!'+icol+'2:'+icol+str(1+nNames) result = self.service.spreadsheets().values().get( spreadsheetId=self.spreadsheetId, range=rangeName).execute() values = result.get('values', []) # Google cuts trailing None if (colname != 'name') and (len(values) < nNames): values.extend([['']] * (nNames - len(values))) # format if colname == vircolname: values = np.array([int(v[0]) if (len(v) and (v[0] != '')) else -1 for v in values], int) else: values = np.array(values)[:, 0] data[colname] = values # Check consistency l = len(data['name']) for colname, datum in data.items(): if len(datum) != l: raise ValueError('Not all columns have the same length') data = pd.DataFrame(data) return data
[docs] def get_table(self, fmt='pandas'): values = super().get_data(self.sheetname) if fmt == 'pandas': return pd.DataFrame(values[1:], columns=values[0]) elif fmt == 'numpy': return np.array(values) elif fmt == 'raw': return values else: raise ValueError('Format not understood')
[docs] def update_tsv_table(self, sheetname, sandbox=True): '''Update TSV table from the Google Sheet''' from ..filenames import get_sample_table_filename fn = get_sample_table_filename(kind=sheetname, sandbox=sandbox) table = self.get_table(sheetname=sheetname, fmt='raw') table_tsv = '\n'.join(map('\t'.join, table))+'\n' with open(fn, 'w') as f: f.write(table_tsv)