From 4f75ec4f7222a9628d3b682c5a1a2147a293ec50 Mon Sep 17 00:00:00 2001
From: alejandraglz <alejandra.gonzalez@uni-jena.de>
Date: Tue, 25 Oct 2022 15:50:00 +0200
Subject: [PATCH] Added notebook to add and push new CoRe simulations

---
 msc/coredb_add_push_new_sims.ipynb | 211 +++++++++++++++++++++++++++++
 1 file changed, 211 insertions(+)
 create mode 100644 msc/coredb_add_push_new_sims.ipynb

diff --git a/msc/coredb_add_push_new_sims.ipynb b/msc/coredb_add_push_new_sims.ipynb
new file mode 100644
index 0000000..ae7bc8b
--- /dev/null
+++ b/msc/coredb_add_push_new_sims.ipynb
@@ -0,0 +1,211 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Add and push new simulations to the CoReDB - Notebook\n",
+    "\n",
+    "This notebook:\n",
+    "1. Adds simulations to the `dbkeys` and prepares its corresponding folder in the `CoRe_DB_clone` path\n",
+    "2. Adds their runs with metadata and h5 file.\n",
+    "3. Updates the CoRe DB index json file\n",
+    "4. Writes in each simulation folder a `.gitattributes` file to add the `data.h5` file to the LFS storage\n",
+    "5. Initializes and creates the git repositories \n",
+    "6. Pushes everything to [https://core-gitlfs.tpi.uni-jena.de/](https://core-gitlfs.tpi.uni-jena.de/)\n",
+    "\n",
+    "Note: The repositories are created as *private* projects. For now it's only possible to change this on the website under `Settings -> Visibility, project features, permissions` for each repo.\n",
+    "\n",
+    "Pre-requisites:\n",
+    "- Make sure you have enough access rights to push\n",
+    "- Add your ssh-key to `gitlab` [here](https://core-gitlfs.tpi.uni-jena.de/-/profile/keys). If it had a passphrase, avoid typing it every time by:\n",
+    "  - Starting the **ssh-agent**: ``` eval `ssh-agent -s` ```\n",
+    "  - Add key and enter passphrase: `ssh-add ~/.ssh/keyname` and you're set!\n",
+    "- This script assumes that the simulations already exist in a common directory (in this case in `tullio:/data/numrel/DATABASE/Release02/`) with their correct run folders with `data.h5` + `metadata.txt`\n",
+    "- In this particular case we had already the `metadata_main.txt` and `metadata.txt`'s, but should work anyways by defining your own metadata dictionary.\n",
+    "- Assumes the CoRe_DB_clone directory is in `tullio:/data/numrel/DATABASE/CoRe_DB_clone`\n",
+    "\n",
+    "[*Last Updated: 10/2022 AG*]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "import numpy as np\n",
+    "from watpy.coredb.coredb import *\n",
+    "from watpy.coredb.metadata import *\n",
+    "from watpy.utils.ioutils import *\n",
+    "from watpy.utils.coreh5 import *"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The function below does the git magic. Modify if you find a way to make the repo public from the beginning."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def git_push(dir,entry,verbose=True):\n",
+    "    print(\"Working in \",dir)\n",
+    "    out, err = runcmd(['git','init'],dir,True)\n",
+    "    print(out,err)\n",
+    "    out, err = runcmd(['git','add','.'],dir,True)\n",
+    "    print(out,err)\n",
+    "    commit = 'Initial commit'\n",
+    "    out = runcmd(['git','commit','-m',commit], dir, True)\n",
+    "    print(out,err) \n",
+    "    url = 'git@core-gitlfs.tpi.uni-jena.de:core_database/'+entry+'.git'\n",
+    "    out = runcmd(['git','push','--set-upstream',url,'master'],dir,True)\n",
+    "    print(out,err)\n",
+    "    print(\"done!\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "db_path = '/data/numrel/DATABASE/CoRe_DB_clone'  \n",
+    "rel2_path   = '/data/numrel/DATABASE/Release02/' # Change accordingly!\n",
+    "cdb = CoRe_db(db_path)\n",
+    "idb = cdb.idb\n",
+    "\n",
+    "direcs = os.listdir(rel2_path) # Array with all simulation folders\n",
+    "# Delete the folders not corresponding to simulations:\n",
+    "direcs.remove('Sim')\n",
+    "direcs.remove('README')\n",
+    "direcs.sort() # Sort if necessary"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "### ADD SIM DIRECTORY AND TO DBKEYS\n",
+    "for entry in direcs:\n",
+    "    dbkey = entry.replace('_',':')\n",
+    "    code = entry.split('_')[0]\n",
+    "    this_path = rel2_path + entry\n",
+    "    \n",
+    "    sim = CoRe_sim(this_path)\n",
+    "    \n",
+    "    # Prepare metadata (change for a dictionary if necessary)\n",
+    "    metamain = this_path + '/metadata_main.txt'\n",
+    "    meta_main = sim.md.data\n",
+    "\n",
+    "    # Add dbkey\n",
+    "    newdbkey = cdb.add_simulation(code,meta_main['simulation_name'], metadata = meta_main)\n",
+    "\n",
+    "    # Same for the runs\n",
+    "    run_list = list(sim.run.keys())\n",
+    "    run_list.sort()\n",
+    "\n",
+    "    for res in run_list:\n",
+    "        print('In :', res)\n",
+    "        run_path = this_path+'/'+res\n",
+    "        simrun = sim.run[res]\n",
+    "        meta_dict = simrun.md.data # metadata.txt should exist!\n",
+    "        cdb.sim[newdbkey].add_run(path = run_path , metadata = meta_dict)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "### UPDATE CORE INDEX\n",
+    "# read metadata_main in a list\n",
+    "mdlist = []\n",
+    "print(cdb.idb.dbkeys)\n",
+    "for key in idb.dbkeys:\n",
+    "    mdlist.append(cdb.sim[key].md)\n",
+    "\n",
+    "# update the index \n",
+    "idb.update_from_mdlist(mdlist)\n",
+    "\n",
+    "# write the index to JSON with the appropriate template \n",
+    "idb.to_json_tmplk()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Now prepare the git repositories:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "for entry in direcs[3:]:\n",
+    "    print('In: ', entry)\n",
+    "    git_path = db_path + '/' + entry\n",
+    "    sim = CoRe_sim(git_path)\n",
+    "    run_list = list(sim.run.keys())\n",
+    "    run_list.sort()\n",
+    "    \n",
+    "    ## ADD .GITATTRIBUTES FILE\n",
+    "    file_path = git_path + '/.gitattributes'\n",
+    "    if os.path.exists(file_path):\n",
+    "        print('File exists!')\n",
+    "    else:\n",
+    "        git_file = open(file_path, \"w\")\n",
+    "        L = []\n",
+    "        for res in run_list:\n",
+    "            git_string = res+'/data.h5 filter=lfs diff=lfs merge=lfs -text \\n'\n",
+    "            L.append(git_string)\n",
+    "        git_file.writelines(L)\n",
+    "        git_file.close()\n",
+    "        print('Wrote file!')\n",
+    "\n",
+    "    ## CREATE REPO AND PUSH\n",
+    "    print('Initialize git repo ..')\n",
+    "    git_push(git_path,entry)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3.9.7 ('base')",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.7"
+  },
+  "orig_nbformat": 4,
+  "vscode": {
+   "interpreter": {
+    "hash": "c1e17a00b0aafd17a1d703df555a95772c618591fbce2c667d77b3e0cadbd66d"
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
-- 
GitLab