Source code for aido.interface

import os
from abc import ABC, abstractmethod
from typing import Dict, List, Optional

import numpy as np
import pandas as pd
import torch

from aido.monitoring.logger import WandbLogger
from aido.simulation_helpers import SimulationParameterDictionary


class _UserInterfaceBase(ABC):

    def __init__(self) -> None:
        self.results_dir: str | os.PathLike
        self.wandb_logger: WandbLogger | None

    @staticmethod
    def create_surrogate_dataset(
            parameter_dict: SimulationParameterDictionary,
            user_reco_loss: pd.Series | pd.DataFrame | np.ndarray,
            user_context: pd.Series | pd.DataFrame | None = None,
            ):
        pass


[docs] class UserInterfaceBase(_UserInterfaceBase): """Abstract Base Class for the interface between AIDO and user-defined code. These methods must be implemented: - simulate - merge - reconstruct Optional methods to implement: - constraints - plot - loss """
[docs] @abstractmethod def simulate(self, parameter_dict_path: str, sim_output_path: str) -> None: """ This method must be implemented Starts the simulation process. We recommend starting a container and passing the arguments from the command line. Args: parameter_dict_path (str): The path to the parameter dictionary file. sim_output_path (str): The path to save the simulation output. Examples: To open the parameter dict in your python script: >>> parameter_dict = json.load(parameter_dict_path) Access its items by name and the key 'current_value': >>> foo_value = parameter_dict["foo"]["current_value] Use equivalent methods to open JSON files if using C++ or other languages. Important: The simulation should output exactly one file, which must be saved at 'sim_output_path'. You are free to choose the output format of the simulation (e.g. root file) """ raise NotImplementedError
[docs] @abstractmethod def merge( self, parameter_dict_file_paths: List[str], simulation_file_paths: List[str], reco_input_path: str ) -> None: """ This method must be implemented This method must merge the parameter dicts and the simulation outputs into a single file. Its file path will be passed by the scheduler to the 'reconstruct' method as the first argument ('reco_input_path'). You are free to choose the file format of 'reco_input_path'. Args: parameter_dict_file_paths (List[str]): List of the simulation parameter dictionary paths simulation_file_paths (List[str]): List of the simulation output paths reco_input_path (str): Path for the merged file created by this method. Important: The output file generated by this method must be a parquet file of a pandas.DataFrame. The format of this DataFrame has to be such that these columns exist. - df["Parameters"]: The parameters given in the format of :meth:`SimulationParameterDictionary.to_df(<length>, display_discrete="as_one_hot")`, where `length` is the number of total events. - df["Inputs"]: Relevant input information fed to the reconstruction algorithm - df["Targets"]: The ground truth fed to the reconstruction algorithm - df["Context"]: Additional information (such as Particle ID) Example: You can easily construct such as DataFrame by using a dict and passing it to pandas. Here we will first build the DataFrame for each simulation result and then concatenate them together. For a single simulation result: Load the SimulationParameterDictionary with >>> parameter_dict = aido.SimulationParameterDictionary.from_json(parameter_dict_path) ... parameter_df = parameter_dict.to_df(<length>), display_discrete="as_one_hot") Where length is the total number of events. Now build the dict >>> df_combined_dict = { ... "Parameters": , ... "Inputs": input_df[input_keys], ... "Targets": input_df[target_keys], ... "Context": input_df[context_keys], ... } Where input_df is a :class:`pandas.DataFrame` of arbitrary columns and <length> row (e.g the number of events). This way we can concatenate them in the following step: >>> df: pd.DataFrame = pd.concat( ... df_combined_dict.values(), ... keys=df_combined_dict.keys(), ... axis=1, ... ) Finally, we have to concatenate the different simulations together along axis=0 (the event axis). Here is one way to do it: >>> df_list: list[pandas.DataFrame] = [] ... for simulation_result in simulation_file_paths: ... # Some code that returns the DataFrame for this simulation ... # For example a function f(sim_param_path, sim_input_path) -> df_i ... df_list.append(<df_i>) Next, concatenate everything and reset the index, as it will be duplicated otherwise >>> df: pd.DataFrame = pd.concat(df_list, axis=0, ignore_index=True) ... df = df.reset_index(drop=True) Finished! Now simply save this DataFrame to a parquet file with: >>> df.to_parquet(reco_input_path, index=range(len(df))) """ raise NotImplementedError
[docs] @abstractmethod def reconstruct(self, reco_input_path: str, reco_output_path: str, is_validation: bool = False) -> None: """ This method must be implemented Start your reconstruction algorithm here. We recommend using a container and starting the reconstruction from the command line. Args: reco_input_path (str): Path of the input file for your reconstruction process. It is the same path as the output of the 'merge' method. reco_output_path (str): Path of the output file generated by your reconstruction process. Since this file interfaces with the AIDO Optimizer, it must have a specific format detailed in the following. is_validation (bool): Useful to define a distinct behavior for regular reconstruction and for evaluation. """ raise NotImplementedError
[docs] def constraints( self, parameter_dict: SimulationParameterDictionary, parameter_dict_as_tensor: Dict[str, torch.Tensor] ) -> None | torch.Tensor: """ This method is optional Use this method to compute additional constraints such as cost or dimensions using pytorch. The resulting Tensor must be one-dimensional and include gradients. """ return None
[docs] def plot(self, parameter_dict: SimulationParameterDictionary) -> None: """ This method is optional Use this method to execute code after each iteration. This can be anything used to track the progress of the Optimization process. Errors that occur during the execution of this function will be automatically excepted and displayed as warnings. """ return None
[docs] def loss(self, y: torch.Tensor, y_pred: torch.Tensor) -> torch.Tensor: """ This method is optional Use this method to compute the loss of the internal Optimizer. This must be an equivalent implementation to your reconstruction loss. """ raise NotImplementedError