Source code for aido.interface
import os
from abc import ABC, abstractmethod
from typing import Dict, List, Optional
import numpy as np
import pandas as pd
import torch
from aido.monitoring.logger import WandbLogger
from aido.simulation_helpers import SimulationParameterDictionary
class _UserInterfaceBase(ABC):
def __init__(self) -> None:
self.results_dir: str | os.PathLike
self.wandb_logger: WandbLogger | None
@staticmethod
def create_surrogate_dataset(
parameter_dict: SimulationParameterDictionary,
user_reco_loss: pd.Series | pd.DataFrame | np.ndarray,
user_context: pd.Series | pd.DataFrame | None = None,
):
pass
[docs]
class UserInterfaceBase(_UserInterfaceBase):
"""Abstract Base Class for the interface between AIDO and user-defined code.
These methods must be implemented:
- simulate
- merge
- reconstruct
Optional methods to implement:
- constraints
- plot
- loss
"""
[docs]
@abstractmethod
def simulate(self, parameter_dict_path: str, sim_output_path: str) -> None:
""" This method must be implemented
Starts the simulation process. We recommend starting a container and passing the arguments
from the command line.
Args:
parameter_dict_path (str): The path to the parameter dictionary file.
sim_output_path (str): The path to save the simulation output.
Examples:
To open the parameter dict in your python script:
>>> parameter_dict = json.load(parameter_dict_path)
Access its items by name and the key 'current_value':
>>> foo_value = parameter_dict["foo"]["current_value]
Use equivalent methods to open JSON files if using C++ or other languages.
Important:
The simulation should output exactly one file, which must be saved at 'sim_output_path'. You
are free to choose the output format of the simulation (e.g. root file)
"""
raise NotImplementedError
[docs]
@abstractmethod
def merge(
self,
parameter_dict_file_paths: List[str],
simulation_file_paths: List[str],
reco_input_path: str
) -> None:
""" This method must be implemented
This method must merge the parameter dicts and the simulation outputs into a single file.
Its file path will be passed by the scheduler to the 'reconstruct' method as the first
argument ('reco_input_path'). You are free to choose the file format of 'reco_input_path'.
Args:
parameter_dict_file_paths (List[str]): List of the simulation parameter dictionary paths
simulation_file_paths (List[str]): List of the simulation output paths
reco_input_path (str): Path for the merged file created by this method.
Important:
The output file generated by this method must be a parquet file of a pandas.DataFrame. The
format of this DataFrame has to be such that these columns exist.
- df["Parameters"]: The parameters given in the format of
:meth:`SimulationParameterDictionary.to_df(<length>, display_discrete="as_one_hot")`,
where `length` is the number of total events.
- df["Inputs"]: Relevant input information fed to the reconstruction algorithm
- df["Targets"]: The ground truth fed to the reconstruction algorithm
- df["Context"]: Additional information (such as Particle ID)
Example:
You can easily construct such as DataFrame by using a dict and passing it to pandas. Here
we will first build the DataFrame for each simulation result and then concatenate them together.
For a single simulation result:
Load the SimulationParameterDictionary with
>>> parameter_dict = aido.SimulationParameterDictionary.from_json(parameter_dict_path)
... parameter_df = parameter_dict.to_df(<length>), display_discrete="as_one_hot")
Where length is the total number of events. Now build the dict
>>> df_combined_dict = {
... "Parameters": ,
... "Inputs": input_df[input_keys],
... "Targets": input_df[target_keys],
... "Context": input_df[context_keys],
... }
Where input_df is a :class:`pandas.DataFrame` of arbitrary columns and <length> row (e.g the number
of events). This way we can concatenate them in the following step:
>>> df: pd.DataFrame = pd.concat(
... df_combined_dict.values(),
... keys=df_combined_dict.keys(),
... axis=1,
... )
Finally, we have to concatenate the different simulations together along axis=0 (the event axis).
Here is one way to do it:
>>> df_list: list[pandas.DataFrame] = []
... for simulation_result in simulation_file_paths:
... # Some code that returns the DataFrame for this simulation
... # For example a function f(sim_param_path, sim_input_path) -> df_i
... df_list.append(<df_i>)
Next, concatenate everything and reset the index, as it will be duplicated otherwise
>>> df: pd.DataFrame = pd.concat(df_list, axis=0, ignore_index=True)
... df = df.reset_index(drop=True)
Finished! Now simply save this DataFrame to a parquet file with:
>>> df.to_parquet(reco_input_path, index=range(len(df)))
"""
raise NotImplementedError
[docs]
@abstractmethod
def reconstruct(self, reco_input_path: str, reco_output_path: str, is_validation: bool = False) -> None:
""" This method must be implemented
Start your reconstruction algorithm here. We recommend using a container and starting the
reconstruction from the command line.
Args:
reco_input_path (str): Path of the input file for your reconstruction process. It is the same
path as the output of the 'merge' method.
reco_output_path (str): Path of the output file generated by your reconstruction process. Since
this file interfaces with the AIDO Optimizer, it must have a specific format detailed in the
following.
is_validation (bool): Useful to define a distinct behavior for regular reconstruction and for
evaluation.
"""
raise NotImplementedError
[docs]
def constraints(
self,
parameter_dict: SimulationParameterDictionary,
parameter_dict_as_tensor: Dict[str, torch.Tensor]
) -> None | torch.Tensor:
""" This method is optional
Use this method to compute additional constraints such as cost or dimensions using pytorch. The resulting
Tensor must be one-dimensional and include gradients.
"""
return None
[docs]
def plot(self, parameter_dict: SimulationParameterDictionary) -> None:
""" This method is optional
Use this method to execute code after each iteration. This can be anything used to track the
progress of the Optimization process. Errors that occur during the execution of this function
will be automatically excepted and displayed as warnings.
"""
return None
[docs]
def loss(self, y: torch.Tensor, y_pred: torch.Tensor) -> torch.Tensor:
""" This method is optional
Use this method to compute the loss of the internal Optimizer. This must be an equivalent
implementation to your reconstruction loss.
"""
raise NotImplementedError