From 4f75ec4f7222a9628d3b682c5a1a2147a293ec50 Mon Sep 17 00:00:00 2001 From: alejandraglz <alejandra.gonzalez@uni-jena.de> Date: Tue, 25 Oct 2022 15:50:00 +0200 Subject: [PATCH] Added notebook to add and push new CoRe simulations --- msc/coredb_add_push_new_sims.ipynb | 211 +++++++++++++++++++++++++++++ 1 file changed, 211 insertions(+) create mode 100644 msc/coredb_add_push_new_sims.ipynb diff --git a/msc/coredb_add_push_new_sims.ipynb b/msc/coredb_add_push_new_sims.ipynb new file mode 100644 index 0000000..ae7bc8b --- /dev/null +++ b/msc/coredb_add_push_new_sims.ipynb @@ -0,0 +1,211 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Add and push new simulations to the CoReDB - Notebook\n", + "\n", + "This notebook:\n", + "1. Adds simulations to the `dbkeys` and prepares its corresponding folder in the `CoRe_DB_clone` path\n", + "2. Adds their runs with metadata and h5 file.\n", + "3. Updates the CoRe DB index json file\n", + "4. Writes in each simulation folder a `.gitattributes` file to add the `data.h5` file to the LFS storage\n", + "5. Initializes and creates the git repositories \n", + "6. Pushes everything to [https://core-gitlfs.tpi.uni-jena.de/](https://core-gitlfs.tpi.uni-jena.de/)\n", + "\n", + "Note: The repositories are created as *private* projects. For now it's only possible to change this on the website under `Settings -> Visibility, project features, permissions` for each repo.\n", + "\n", + "Pre-requisites:\n", + "- Make sure you have enough access rights to push\n", + "- Add your ssh-key to `gitlab` [here](https://core-gitlfs.tpi.uni-jena.de/-/profile/keys). If it had a passphrase, avoid typing it every time by:\n", + " - Starting the **ssh-agent**: ``` eval `ssh-agent -s` ```\n", + " - Add key and enter passphrase: `ssh-add ~/.ssh/keyname` and you're set!\n", + "- This script assumes that the simulations already exist in a common directory (in this case in `tullio:/data/numrel/DATABASE/Release02/`) with their correct run folders with `data.h5` + `metadata.txt`\n", + "- In this particular case we had already the `metadata_main.txt` and `metadata.txt`'s, but should work anyways by defining your own metadata dictionary.\n", + "- Assumes the CoRe_DB_clone directory is in `tullio:/data/numrel/DATABASE/CoRe_DB_clone`\n", + "\n", + "[*Last Updated: 10/2022 AG*]" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import numpy as np\n", + "from watpy.coredb.coredb import *\n", + "from watpy.coredb.metadata import *\n", + "from watpy.utils.ioutils import *\n", + "from watpy.utils.coreh5 import *" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The function below does the git magic. Modify if you find a way to make the repo public from the beginning." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "def git_push(dir,entry,verbose=True):\n", + " print(\"Working in \",dir)\n", + " out, err = runcmd(['git','init'],dir,True)\n", + " print(out,err)\n", + " out, err = runcmd(['git','add','.'],dir,True)\n", + " print(out,err)\n", + " commit = 'Initial commit'\n", + " out = runcmd(['git','commit','-m',commit], dir, True)\n", + " print(out,err) \n", + " url = 'git@core-gitlfs.tpi.uni-jena.de:core_database/'+entry+'.git'\n", + " out = runcmd(['git','push','--set-upstream',url,'master'],dir,True)\n", + " print(out,err)\n", + " print(\"done!\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "db_path = '/data/numrel/DATABASE/CoRe_DB_clone' \n", + "rel2_path = '/data/numrel/DATABASE/Release02/' # Change accordingly!\n", + "cdb = CoRe_db(db_path)\n", + "idb = cdb.idb\n", + "\n", + "direcs = os.listdir(rel2_path) # Array with all simulation folders\n", + "# Delete the folders not corresponding to simulations:\n", + "direcs.remove('Sim')\n", + "direcs.remove('README')\n", + "direcs.sort() # Sort if necessary" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "### ADD SIM DIRECTORY AND TO DBKEYS\n", + "for entry in direcs:\n", + " dbkey = entry.replace('_',':')\n", + " code = entry.split('_')[0]\n", + " this_path = rel2_path + entry\n", + " \n", + " sim = CoRe_sim(this_path)\n", + " \n", + " # Prepare metadata (change for a dictionary if necessary)\n", + " metamain = this_path + '/metadata_main.txt'\n", + " meta_main = sim.md.data\n", + "\n", + " # Add dbkey\n", + " newdbkey = cdb.add_simulation(code,meta_main['simulation_name'], metadata = meta_main)\n", + "\n", + " # Same for the runs\n", + " run_list = list(sim.run.keys())\n", + " run_list.sort()\n", + "\n", + " for res in run_list:\n", + " print('In :', res)\n", + " run_path = this_path+'/'+res\n", + " simrun = sim.run[res]\n", + " meta_dict = simrun.md.data # metadata.txt should exist!\n", + " cdb.sim[newdbkey].add_run(path = run_path , metadata = meta_dict)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "### UPDATE CORE INDEX\n", + "# read metadata_main in a list\n", + "mdlist = []\n", + "print(cdb.idb.dbkeys)\n", + "for key in idb.dbkeys:\n", + " mdlist.append(cdb.sim[key].md)\n", + "\n", + "# update the index \n", + "idb.update_from_mdlist(mdlist)\n", + "\n", + "# write the index to JSON with the appropriate template \n", + "idb.to_json_tmplk()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now prepare the git repositories:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "for entry in direcs[3:]:\n", + " print('In: ', entry)\n", + " git_path = db_path + '/' + entry\n", + " sim = CoRe_sim(git_path)\n", + " run_list = list(sim.run.keys())\n", + " run_list.sort()\n", + " \n", + " ## ADD .GITATTRIBUTES FILE\n", + " file_path = git_path + '/.gitattributes'\n", + " if os.path.exists(file_path):\n", + " print('File exists!')\n", + " else:\n", + " git_file = open(file_path, \"w\")\n", + " L = []\n", + " for res in run_list:\n", + " git_string = res+'/data.h5 filter=lfs diff=lfs merge=lfs -text \\n'\n", + " L.append(git_string)\n", + " git_file.writelines(L)\n", + " git_file.close()\n", + " print('Wrote file!')\n", + "\n", + " ## CREATE REPO AND PUSH\n", + " print('Initialize git repo ..')\n", + " git_push(git_path,entry)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3.9.7 ('base')", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.7" + }, + "orig_nbformat": 4, + "vscode": { + "interpreter": { + "hash": "c1e17a00b0aafd17a1d703df555a95772c618591fbce2c667d77b3e0cadbd66d" + } + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} -- GitLab