From 24ee96aed4606fb4e5077dec5fcc5ae1b1c395b7 Mon Sep 17 00:00:00 2001 From: Antonio Vara Date: Thu, 1 Sep 2022 14:15:40 +0200 Subject: [PATCH 1/2] new db --- .gitignore | 3 + bin/upload_strains_to_mirri_is_NEWDB.py | 223 ++++++++++++++++++++++++ bin/validateNEW.py | 21 +++ 3 files changed, 247 insertions(+) create mode 100644 .gitignore create mode 100644 bin/upload_strains_to_mirri_is_NEWDB.py create mode 100644 bin/validateNEW.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..2a0bb6c --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +mirri/biolomics/secrets.py +.vscode/launch.json +*.pyc diff --git a/bin/upload_strains_to_mirri_is_NEWDB.py b/bin/upload_strains_to_mirri_is_NEWDB.py new file mode 100644 index 0000000..b4c486e --- /dev/null +++ b/bin/upload_strains_to_mirri_is_NEWDB.py @@ -0,0 +1,223 @@ +#!/usr/bin/env python3 +import argparse +from cmath import nan +import sys +from collections import Counter + +# +from mirri.biolomics.pipelines.growth_medium import get_or_create_or_update_growth_medium +from mirri.biolomics.pipelines.strain import get_or_create_or_update_strain +from mirri.biolomics.remote.biolomics_client import BiolomicsMirriClient +from mirri.io.parsers.mirri_excel import parse_mirri_excel +from mirri.validation.excel_validator import validate_mirri_excel + +##Database +from sqlalchemy import create_engine, MetaData +import pymysql +import pandas as pd +from pathlib import Path +import numpy as np + +# DEFINE THE DATABASE CREDENTIALS +user = 'mirridev' +password = 'estramboticandolotodo' +host = 'mirri-is.mirri.org' +port = 33066 +database = 'mirri-db' + +TEST_SERVER_URL = 'https://webservices.bio-aware.com/mirri_test' +PROD_SERVER_URL = 'https://webservices.bio-aware.com/mirri' + +def show_stats(counter, kind, out_fhand): + out_fhand.write(f'{kind}\n') + line = ''.join(['-'] * len(kind)) + out_fhand.write(f"{line}\n") + for kind2, value in counter.most_common(5): + out_fhand.write(f'{kind2}: {value}\n') + out_fhand.write('\n') + + +def get_cmd_args(): + desc = "Upload strains to MIRRI-IS" + parser = argparse.ArgumentParser(description=desc) + parser.add_argument('-i' , '--input', required=True, help='Validated Excel file', type=argparse.FileType('rb')) + parser.add_argument('-v' , '--spec_version', default='20200601', help='Version of he specification of the given excel file') + parser.add_argument('-u' , '--ws_user', help='Username of the web service') + parser.add_argument('-p' , '--ws_password', required=True, help='Password of the web service user') + parser.add_argument('-c' , '--client_id', required=True, help='Client id of the web service') + parser.add_argument('-s' , '--client_secret', required=True, help='Client secret of the web service') + parser.add_argument('--force_update' , required=False, action='store_true', help='Use it if you want to update the existing strains') + parser.add_argument('--verbose' , action='store_true', help='use it if you want a verbose output') + parser.add_argument('--prod' , action='store_true', help='Use production server') + parser.add_argument('--dont_add_gm' , default=True, action='store_false', help="Don't add growth media") + parser.add_argument('--dont_add_strains', default=True, action='store_false', help="Don't add growth media") + parser.add_argument('--skip_first_num' , type=int, help='skip first X strains to the tool') + + args = parser.parse_args() + + return {'input_fhand': args.input + ,'user': args.ws_user + ,'version': args.spec_version + ,'password': args.ws_password + ,'client_id': args.client_id + ,'client_secret': args.client_secret + ,'update': args.force_update + ,'verbose': args.verbose + ,'use_production_server': args.prod + ,'add_gm': args.dont_add_gm + ,'add_strains': args.dont_add_strains + ,'skip_first_num': args.skip_first_num + } + + +def write_errors_in_screen(errors, fhand=sys.stderr): + for key, errors_by_type in errors.items(): + fhand.write(f'{key}\n') + fhand.write('-' * len(key) + '\n') + for error in errors_by_type: + if error.pk: + fhand.write(f'{error.pk}: ') + fhand.write(f'{error.message} - {error.code}\n') + fhand.write('\n') + + +def create_or_upload_strains(client, strains, update=False, counter=None, out_fhand=None, seek=None): + + for index, strain in enumerate(strains): + if seek is not None and index < seek: + continue + # if strain.id.strain_id != 'CECT 5766': + # continue + result = get_or_create_or_update_strain(client, strain, update=update) + + new_strain = result['record'] + created = result['created'] + updated = result.get('updated', False) + if updated: + result_state = 'updated' + elif created: + result_state = 'created' + else: + result_state = 'not modified' + + if counter is not None: + counter[result_state] += 1 + if out_fhand is not None: + out_fhand.write(f'{index}: Strain {new_strain.id.strain_id}: {result_state}\n') + # break + + +def create_or_upload_growth_media(client, growth_media, update=False, counter=None, out_fhand=None): + + for gm in growth_media: + result = get_or_create_or_update_growth_medium(client, gm, update) + + new_gm = result['record'] + created = result['created'] + updated = result.get('updated', False) + if updated: + result_state = 'updated' + elif created: + result_state = 'created' + else: + result_state = 'not modified' + if counter is not None: + counter[result_state] += 1 + if out_fhand is not None: + out_fhand.write(f'Growth medium {new_gm.record_name}: {result_state}\n') + + +def get_connection(): + # PYTHON FUNCTION TO CONNECT TO THE MYSQL DATABASE AND + # RETURN THE SQLACHEMY ENGINE OBJECT + return create_engine(url="mysql+pymysql://{0}:{1}@{2}:{3}/{4}".format(user, password, host, port, database)) + +def main(): + ## Load Excel + path = Path('C://data//brclims_excel.xlsx') + Excel_Data = pd.read_excel(path, sheet_name = None) + cc_id=1 + + ## Load Database + sqlEngine = get_connection() + engine = sqlEngine.connect() + + ## Create new file upload + with engine.connect() as conn: + metaDats = MetaData(conn, schema=database) + metaDats.reflect(bind=conn) + table = metaDats.tables['mirri-db.file_upload'] + stmt = table.insert().values(filename=path.name,cc_id=cc_id) + aux=conn.execute(stmt).inserted_primary_key[0] + + ## Load all Sheet from Excel to DB + for key in Excel_Data.keys(): + #print(key) + n=Excel_Data[key].replace(np.nan, '', regex=True).astype(str) + n.columns = n.columns.str.replace(' ','_') + n['f_id']=aux + n.to_sql(key, engine, index=False, if_exists='append') + + + +""" + args = get_cmd_args() + input_fhand = args['input_fhand'] + spec_version = args['version'] + out_fhand = sys.stdout + error_log = validate_mirri_excel(input_fhand, version=spec_version) + errors = error_log.get_errors() + skip_first_num = args['skip_first_num'] + if errors: + write_errors_in_screen(errors, out_fhand) + sys.exit(1) + + input_fhand.seek(0) + parsed_objects = parse_mirri_excel(input_fhand, version=spec_version) + strains = list(parsed_objects['strains']) + growth_media = list(parsed_objects['growth_media']) + + server_url = PROD_SERVER_URL if args['use_production_server'] else TEST_SERVER_URL + + client = BiolomicsMirriClient(server_url=server_url, api_version='v2', + client_id=args['client_id'], + client_secret=args['client_secret'], + username=args['user'], + password=args['password'], + verbose=args['verbose']) + + if args['add_gm']: + client.start_transaction() + counter = Counter() + try: + create_or_upload_growth_media(client, growth_media, update=args['update'], + counter=counter, out_fhand=out_fhand) + except (Exception, KeyboardInterrupt) as error: + out_fhand.write('There were some errors in the Growth media upload\n') + out_fhand.write(str(error) + '\n') + out_fhand.write('Rolling back\n') + client.rollback() + raise + client.finish_transaction() + show_stats(counter, 'Growth Media', out_fhand) + + if args['add_strains']: + client.start_transaction() + counter = Counter() + try: + create_or_upload_strains(client, strains, update=args['update'], + counter=counter, + out_fhand=out_fhand, seek=skip_first_num) + client.finish_transaction() + except (Exception, KeyboardInterrupt) as error: + out_fhand.write('There were some errors in the Strain upload\n') + out_fhand.write(str(error) + '\n') + out_fhand.write('rolling back\n') + # client.rollback() + raise + client.finish_transaction() + show_stats(counter, 'Strains', out_fhand) +""" + +if __name__ == '__main__': + main() diff --git a/bin/validateNEW.py b/bin/validateNEW.py new file mode 100644 index 0000000..923bc5c --- /dev/null +++ b/bin/validateNEW.py @@ -0,0 +1,21 @@ +#!/usr/bin/env python +import sys +from pathlib import Path +from mirri.validation.excel_validator import validate_mirri_excel +import warnings +warnings.simplefilter("ignore") + + +def main(): + # path = Path(sys.argv[1]) + path = Path( 'C:/data/brclims_excel.xlsx') + + error_log = validate_mirri_excel(path.open("rb")) + + for errors in error_log.get_errors().values(): + for error in errors: + print(error.pk, error.message, error.code) + + +if __name__ == "__main__": + main() -- 2.47.2 From 2388d5f76af41d2c22ef92216b30b82cd55e6caf Mon Sep 17 00:00:00 2001 From: Antonio Vara Date: Fri, 2 Sep 2022 12:22:43 +0200 Subject: [PATCH 2/2] newdb --- bin/upload_strains_to_mirri_is_NEWDB.py | 1 + 1 file changed, 1 insertion(+) diff --git a/bin/upload_strains_to_mirri_is_NEWDB.py b/bin/upload_strains_to_mirri_is_NEWDB.py index b4c486e..a9a8e87 100644 --- a/bin/upload_strains_to_mirri_is_NEWDB.py +++ b/bin/upload_strains_to_mirri_is_NEWDB.py @@ -158,6 +158,7 @@ def main(): n['f_id']=aux n.to_sql(key, engine, index=False, if_exists='append') + """ -- 2.47.2