{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Getting started\n",
    "\n",
    "Example codes illustrate how to use [Pantea](https://pantea.readthedocs.io/)."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "# !gpustat"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Imports"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "# import os\n",
    "# os.environ[\"JAX_PLATFORM_NAME\"] = \"cpu\"  # disable GPU"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "# import logging\n",
    "# from pantea.logger import set_logging_level\n",
    "# set_logging_level(logging.INFO)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "tags": []
   },
   "source": [
    "## Dataset"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### RuNNer\n",
    "Read input dataset in [RuNNer](https://www.uni-goettingen.de/de/560580.html) format."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Total number of structures: 20\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "Dataset(datasource=RunnerDataSource(filename='input.data', dtype=float64), persist=False)"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from pantea.datasets import Dataset\n",
    "structures = Dataset.from_runner(\"input.data\", persist=False)\n",
    "print(\"Total number of structures:\", len(structures))\n",
    "# structures.preload()\n",
    "structures"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Split train and validation structures"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "# import torch\n",
    "# validation_split = 0.032\n",
    "# nsamples = len(structures)\n",
    "# split = int(np.floor(validation_split * nsamples))\n",
    "# train_structures, valid_structures = torch.utils.data.random_split(structures, lengths=[nsamples-split, split])\n",
    "# structures = valid_structures"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Structure"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Structure(natoms=12, elements=('H', 'O'), dtype=float64)"
      ]
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "structure = structures[0]\n",
    "structure"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [],
   "source": [
    "from ase.visualize import view\n",
    "atoms = structure.to_ase()\n",
    "# view(atoms, viewer='ngl') # ase, ngl"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "from ase.io.vasp import write_vasp\n",
    "write_vasp('POSCAR', atoms)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Compare between structures"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "# from pantea.utils.compare import compare\n",
    "# compare(structures[0], structures[1])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Calculate distance between atoms"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Array([0.        , 2.24720275, 3.85385356, 4.84207409, 7.55265933],      dtype=float64)"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from pantea.atoms import calculate_distances\n",
    "distances = calculate_distances(structure)\n",
    "distances[0, :5]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "# sns.displot(dis.flatten(), bins=20)\n",
    "# plt.axvline(dis.mean(), color='r');"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Find neighboring atom"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Neighbor(r_cutoff=10.0)\n",
      "Number of neighbors for atom index 0: 11\n"
     ]
    }
   ],
   "source": [
    "from pantea.atoms import Neighbor\n",
    "neighbor = Neighbor.from_structure(structure, r_cutoff=10.0)\n",
    "print(neighbor)\n",
    "print(\"Number of neighbors for atom index 0:\", sum(neighbor.masks[0]))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "tags": []
   },
   "source": [
    "### Per-atom energy offset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Array(-12.93900273, dtype=float64)"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "structure = structures[0]\n",
    "atom_energy = {'O': 2.4, 'H': 1.2}\n",
    "\n",
    "structure.add_energy_offset(atom_energy)\n",
    "structure.total_energy"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Descriptor\n",
    "\n",
    "Atomic environment descriptor."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "from pantea.descriptors import ACSF\n",
    "from pantea.descriptors.acsf import G2, G3, G9, CutoffFunction, NeighborElements"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "AtomCenteredSymmetryFunction(central_element='O', num_symmetry_functions=4)"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Define cutoff, radial, and angular symmetry functions\n",
    "cfn = CutoffFunction.from_type(\"tanhu\", r_cutoff=12.0)\n",
    "g2_1 = G2(cfn, 0.0, 0.001)\n",
    "g2_2 = G2(cfn, 0.0, 0.01)\n",
    "g3_1 = G3(cfn, 0.2, 1.0, 1.0, 0.0)\n",
    "g9_1 = G3(cfn, 0.2, 1.0, 1.0, 0.0)\n",
    "# Create an ACSF descriptor for Oxygen atoms with multiple symmetry functions\n",
    "acsf = ACSF(\n",
    "    central_element='O',\n",
    "    radial_symmetry_functions=(\n",
    "        (g2_1, NeighborElements('H')),\n",
    "        (g2_2, NeighborElements('H')),\n",
    "    ),\n",
    "    angular_symmetry_functions=(\n",
    "        (g3_1, NeighborElements('H', 'H')),\n",
    "        (g9_1, NeighborElements('H', 'O')),\n",
    "    ),\n",
    ")\n",
    "acsf"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Computing descriptor values"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Array([[1.14844196e+00, 9.88176531e-01, 1.47813356e-04, 4.32482880e-04],\n",
       "       [1.08272759e+00, 9.35614402e-01, 2.52484053e-04, 7.64628911e-06],\n",
       "       [1.21152837e+00, 1.01618940e+00, 3.80667373e-05, 3.74757210e-04],\n",
       "       [1.01457901e+00, 8.42276766e-01, 4.21045778e-05, 1.53015035e-08]],      dtype=float64)"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "descriptor_values = acsf(structure) # only for O atoms\n",
    "descriptor_values"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Gradient"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Array([[[-0.02516027,  0.00975464,  0.08007459],\n",
       "        [-0.01986903, -0.01250941,  0.06779186],\n",
       "        [ 0.00015445, -0.00028165, -0.00015613],\n",
       "        [-0.00093546,  0.00050747,  0.00044732]]], dtype=float64)"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "gradients = acsf.grad(structure) # gradient respect to the atom positions\n",
    "gradients[:1, ...]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Scaler\n",
    "\n",
    "Descriptor scaler."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
    "from pantea.descriptors import DescriptorScaler, ScalerParams\n",
    "from tqdm import tqdm"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Fitting scaling parameters"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 20/20 [00:00<00:00, 44.04it/s]\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "DescriptorScaler(transform='_scale_center', scale_range=(0.0, 1.0))"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "scaler = DescriptorScaler.from_type('scale_center')\n",
    "\n",
    "for index, structure in enumerate(tqdm(structures)):  \n",
    "    descriptor_values = acsf(structure)\n",
    "    if index == 0:\n",
    "        scaler_params = scaler.fit(descriptor_values) \n",
    "    else:\n",
    "        scaler_params = scaler.partial_fit(scaler_params, descriptor_values)\n",
    "    \n",
    "scaler"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Array([[ 0.11808065,  0.16078358, -0.11087884,  0.32829958],\n",
       "       [ 0.09382786,  0.1297158 ,  0.2480281 , -0.33211773],\n",
       "       [ 0.27612704,  0.18111383, -0.24553955,  0.29089544],\n",
       "       [-0.04685936, -0.06034315, -0.1896889 , -0.33306557]],      dtype=float64)"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "scaled_descriptor_values = scaler(scaler_params, descriptor_values)\n",
    "scaled_descriptor_values"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [],
   "source": [
    "from pantea.models import NeuralNetworkModel\n",
    "from pantea.models.nn import UniformInitializer\n",
    "from flax import linen as nn\n",
    "import jax\n",
    "import jax.numpy as jnp"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "NeuralNetworkModel(hidden_layers=((8, 'tanh'), (8, 'tanh')), dtype=float64)"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model = NeuralNetworkModel(\n",
    "    hidden_layers=(\n",
    "        (8, 'tanh'), \n",
    "        (8, 'tanh'),\n",
    "    ),\n",
    ")\n",
    "model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'params': {'layers_0': {'bias': (8,), 'kernel': (4, 8)},\n",
       "  'layers_2': {'bias': (8,), 'kernel': (8, 8)},\n",
       "  'layers_4': {'bias': (1,), 'kernel': (8, 1)}}}"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "rng = jax.random.PRNGKey(2022)                               # PRNG Key\n",
    "inputs = jnp.ones(shape=(8, acsf.num_symmetry_functions))    # Dummy Input\n",
    "\n",
    "model_params = model.init(rng, inputs)                             # Initialize the parameters\n",
    "jax.tree.map(lambda x: x.shape, model_params)                      # Check the parameters"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Array([[-0.01620077],\n",
       "       [-0.18288607],\n",
       "       [ 0.0744489 ],\n",
       "       [ 0.06749975]], dtype=float64)"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "energies = model.apply(model_params, scaled_descriptor_values)\n",
    "energies # per atom energies"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Atomic Potential\n",
    "\n",
    "An atomic potential calculates the energy of a specific element in structures. It forms the basic building block of the final potential, which typically contains multiple elements. Atomic potential bundles up all the necessary components such as descriptors, scalers, and models in order to output the per-atomic energy."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [],
   "source": [
    "from pantea.potentials.nnp import AtomicPotential"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "AtomicPotential(\n",
       "  descriptor=AtomCenteredSymmetryFunction(central_element='O', num_symmetry_functions=4),\n",
       "  scaler=DescriptorScaler(transform='_scale_center', scale_range=(0.0, 1.0)),\n",
       "  model=NeuralNetworkModel(hidden_layers=((8, 'tanh'), (8, 'tanh')), dtype=float64),\n",
       ")"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "atomic_potential = AtomicPotential(\n",
    "    descriptor=acsf,\n",
    "    scaler=scaler,\n",
    "    model=model,\n",
    ")\n",
    "\n",
    "atomic_potential"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Array([[-0.01620077],\n",
       "       [-0.18288607],\n",
       "       [ 0.0744489 ],\n",
       "       [ 0.06749975]], dtype=float64)"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "energies =  atomic_potential.apply(model_params[\"params\"], scaler_params, structure)\n",
    "energies"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Neural Network Potential\n",
    "\n",
    "An instance of neural network potential (NNP) including descirptor, scaler, and model for multiple elements can be initialzied directly from the input potential files. "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [],
   "source": [
    "from pantea.datasets import Dataset\n",
    "from pantea.potentials import NeuralNetworkPotential\n",
    "from ase.visualize import view\n",
    "from pathlib import Path"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Read dataset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Structure(natoms=12, elements=('H', 'O'), dtype=float64)"
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "base_dir = Path(\".\")\n",
    "\n",
    "# Atomic data\n",
    "structures = Dataset.from_runner(Path(base_dir, \"input.data\"))\n",
    "\n",
    "structure = structures[0]\n",
    "structure"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Load potential parameters"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Potential\n",
    "nnp = NeuralNetworkPotential.from_runner(Path(base_dir, \"input.nn\"))\n",
    "\n",
    "# nnp.save()\n",
    "nnp.load()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Predictions\n",
    "\n",
    "Warm-up period is bacause of the lazy class loading and just-in-time (JIT) compilation."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Array(-33.97294888, dtype=float64)"
      ]
     },
     "execution_count": 31,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "total_energy = nnp(structure)\n",
    "total_energy"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Array([[-0.00120265, -0.00401054,  0.02040878],\n",
       "       [-0.08127553, -0.05667687,  0.0694481 ],\n",
       "       [ 0.03140665,  0.09825961,  0.0487879 ],\n",
       "       [ 0.08668581, -0.00724281,  0.03676635],\n",
       "       [-0.02163175, -0.0238923 ,  0.01323156]], dtype=float64)"
      ]
     },
     "execution_count": 32,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "forces = nnp.compute_forces(structure)\n",
    "forces[:5]"
   ]
  }
 ],
 "metadata": {
  "interpreter": {
   "hash": "79f793817441a9b4f3336163bce28fb05f5fade87f8f747882a8ea5340683793"
  },
  "kernelspec": {
   "display_name": "Python (pantea)",
   "language": "python",
   "name": "pantea"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.14"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}