Source code for llmize.base

import functools
import multiprocessing as mp
from typing import List, Optional, Callable, Dict, Any, Union
from .config import get_config
from .utils.parsing import parse_pairs
from .llm.llm_call import generate_content
from .llm.llm_init import initialize_llm
from .utils.logger import log_info, log_critical, log_debug, log_warning, log_error
from .utils.parsing import parse_response
from .utils.decorators import check_init, time_it
from .callbacks import EarlyStopping, AdaptTempOnPlateau

[docs] class OptimizationResult: """ A container class for storing and accessing the results of an optimization process. This class provides a structured way to access all relevant information from an optimization run, including the best solution found, its score, and the complete optimization history. Attributes: best_solution: The best solution found during optimization. Can be any type depending on the problem (string, list, tuple, etc.) best_score: The score of the best solution (float) best_score_history: Complete list of best scores at each step best_score_per_step: List of best scores achieved in each batch avg_score_per_step: List of average scores for each batch Example: >>> result = optimizer.minimize(...) >>> print(f"Best solution: {result.best_solution}") >>> print(f"Best score: {result.best_score}") >>> # Convert to dictionary for saving >>> result_dict = result.to_dict() """
[docs] def __init__(self, best_solution: Any, best_score: float, best_score_history: List[float], best_score_per_step: List[float], avg_score_per_step: List[float]): self.best_solution = best_solution self.best_score = best_score self.best_score_history = best_score_history self.best_score_per_step = best_score_per_step self.avg_score_per_step = avg_score_per_step
[docs] def to_dict(self) -> Dict[str, Any]: """Convert the result to a dictionary format.""" return { "best_solution": self.best_solution, "best_score": self.best_score, "best_score_history": self.best_score_history, "best_score_per_step": self.best_score_per_step, "avg_score_per_step": self.avg_score_per_step }
[docs] class Optimizer: """ Base class for all LLM-based optimizers in LLMize. This class provides the common interface and functionality for all optimization methods. It handles configuration management, LLM initialization, and provides the standard maximize/minimize interface that all optimizers must implement. All optimizer classes (OPRO, ADOPRO, HLMEA, HLMSA) inherit from this base class. Attributes: problem_text (str): Text description of the optimization problem obj_func (callable): Objective function to evaluate solutions llm_model (str): Name of the LLM model to use api_key (str): API key for the LLM service Note: This class should not be used directly. Instead, use one of the specific optimizer implementations (OPRO, ADOPRO, HLMEA, HLMSA). """
[docs] def __init__(self, problem_text=None, obj_func=None, llm_model=None, api_key=None): """ Initialize the Optimizer with the general configuration. Args: problem_text (str, optional): A natural language description of the optimization problem. This helps the LLM understand the context. If None, must be provided before optimization. obj_func (callable, optional): The objective function to optimize. Should accept a solution and return a numerical score. If None, must be provided before optimization. llm_model (str, optional): Name of the LLM model to use. If None, uses the default from configuration. api_key (str, optional): API key for the LLM service. If None, will look for environment variable or config setting. """ config = get_config() self.problem_text = problem_text self.obj_func = obj_func self.llm_model = llm_model if llm_model is not None else config.default_model self.api_key = api_key
@check_init @time_it def maximize(self, init_samples=None, init_scores=None, num_steps=None, batch_size=None, temperature=None, callbacks=None, verbose=1, parallel_n_jobs=None): """ Run the optimization algorithm to maximize the objective function. Parameters: - init_samples (list): A list of initial solutions. - init_scores (list): A list of initial scores corresponding to init_samples. - num_steps (int): The number of optimization steps (default from config). - batch_size (int): The batch size used for optimization (default from config). - temperature (float): The temperature for the LLM model (default from config). - callbacks (list): A list of callback functions to be triggered at the end of each step. - verbose (int): The verbosity level (default: 1). - parallel_n_jobs (int): The number of parallel jobs for evaluation (default from config). Returns: - results (OptimizationResult): An object containing the optimization results. """ config = get_config() # Use config defaults if not provided if num_steps is None: num_steps = config.default_num_steps if batch_size is None: batch_size = config.default_batch_size if temperature is None: temperature = config.temperature if parallel_n_jobs is None: parallel_n_jobs = config.parallel_n_jobs return self.optimize(init_samples=init_samples, init_scores=init_scores, num_steps=num_steps, batch_size=batch_size, temperature=temperature, callbacks=callbacks, verbose=verbose, optimization_type="maximize", parallel_n_jobs=parallel_n_jobs) @check_init @time_it def minimize(self, init_samples=None, init_scores=None, num_steps=None, batch_size=None, temperature=None, callbacks=None, verbose=1, parallel_n_jobs=None): """ Run the optimization algorithm to minimize the objective function. Parameters: - init_samples (list): A list of initial solutions. - init_scores (list): A list of initial scores corresponding to init_samples. - num_steps (int): The number of optimization steps (default from config). - batch_size (int): The batch size used for optimization (default from config). - temperature (float): The temperature for the LLM model (default from config). - callbacks (list): A list of callback functions to be triggered at the end of each step. - verbose (int): The verbosity level (default: 1). - parallel_n_jobs (int): The number of parallel jobs for evaluation (default from config). Returns: - results (dict): A dictionary containing the best solution, best score, best score history, best score per step, and average score per step. """ config = get_config() # Use config defaults if not provided if num_steps is None: num_steps = config.default_num_steps if batch_size is None: batch_size = config.default_batch_size if temperature is None: temperature = config.temperature if parallel_n_jobs is None: parallel_n_jobs = config.parallel_n_jobs return self.optimize(init_samples=init_samples, init_scores=init_scores, num_steps=num_steps, batch_size=batch_size, temperature=temperature, callbacks=callbacks, verbose=verbose, optimization_type="minimize", parallel_n_jobs=parallel_n_jobs)
[docs] def get_configuration(self): """ Returns the general configuration of the optimizer. """ return { "llm_model": self.llm_model, "problem_text": self.problem_text, "obj_func": (self.obj_func.func.__name__ if isinstance(self.obj_func, functools.partial) else self.obj_func.__name__) if self.obj_func else None }
[docs] def meta_prompt(self, batch_size, example_pairs, optimization_type="maximize"): """ Dummy function for meta_prompt. Should be overridden by subclasses. Parameters: - batch_size (int): Number of new solutions to generate. - example_pairs (str): Example solutions and scores. - optimization_type (str): "maximize" or "minimize" (default: "maximize"). Returns: - prompt (str): A formatted prompt structure. """ return "prompt"
[docs] def get_sample_prompt(self, batch_size=None, optimization_type="maximize", init_samples=None, init_scores=None): """ Generate a sample prompt for the language model based on the provided parameters. Parameters: - batch_size (int): The number of new solutions to generate (default from config). - optimization_type (str): The type of optimization to perform, either "maximize" or "minimize" (default: "maximize"). - init_samples (list): A list of initial solutions (default: None). - init_scores (list): A list of initial scores corresponding to init_samples (default: None). Returns: - str: The generated prompt as a string to be used for generating solutions from the language model. """ if batch_size is None: batch_size = get_config().default_batch_size example_pairs = parse_pairs(init_samples, init_scores) prompt = self.meta_prompt(batch_size=batch_size, example_pairs=example_pairs, optimization_type=optimization_type) return prompt
[docs] def get_sample_response(self, prompt): """ Generate a response from the language model using the provided prompt. Parameters: - prompt (str): The prompt to be used for generating the response. Returns: - str: The generated content as a string from the language model. """ client = initialize_llm(self.llm_model, self.api_key) return generate_content(client, self.llm_model, prompt)
[docs] def _generate_solutions(self, client, prompt, temperature, batch_size, verbose, max_retries=5, hp_parse=False): """ Generate solutions by retrying content generation until the solution array has the expected batch size. Parameters: client: The client instance to be passed to the content generation function. llm_model: The language model to be used for generating content. prompt: The prompt to send to the model. temperature: The temperature parameter for content generation. batch_size: Expected number of solutions. verbose: Verbosity level for debug logging. max_retries: Maximum number of retry attempts. hp_parse: Whether to parse the hyperparameters from the response. Returns: A list of solutions that matches the expected batch_size. Raises: ValueError: If the expected number of solutions cannot be generated after max_retries. """ response = generate_content(client, self.llm_model, prompt, temperature) if hp_parse: solution_array, hp = parse_response(response, hp_parse) hp_none = hp is None else: solution_array = parse_response(response, hp_parse) hp_none = False if verbose > 2: #log_debug(f"Prompt: {prompt}") log_debug(f"Response: {response}") if verbose > 1: log_debug(f"Generated Solutions: {solution_array}") # Always show the response in debug mode to troubleshoot HLMSA issues if verbose >= 1 and solution_array is None: log_error(f"Failed to parse solutions. LLM response was: {response}") retry = 0 while solution_array is None or len(solution_array) < batch_size or (hp_parse and hp_none): log_warning("Number of solutions parsed is less than batch size. Retrying...") response = generate_content(client, self.llm_model, prompt, temperature) if hp_parse: solution_array, hp = parse_response(response, hp_parse) hp_none = hp is None else: solution_array = parse_response(response, hp_parse) hp_none = False if verbose > 2: log_debug(f"Response for retry {retry+1}: {response}") if verbose > 1: log_debug(f"Generated Solutions for retry {retry+1}: {solution_array}") retry += 1 if retry >= max_retries: log_critical("Failed to generate solutions after multiple attempts.") raise ValueError("Failed to generate solutions after multiple attempts.") if len(solution_array) > batch_size: log_warning(f"Number of solutions parsed is greater than batch size. Removing extra solutions.") # Remove first extra solutions solution_array = solution_array[:batch_size] if hp_parse: return solution_array, hp else: return solution_array
[docs] def _evaluate_solutions(self, solution_array, best_solution, optimization_type, verbose, best_score=None, parallel_n_jobs=None): """ Evaluate a list of solutions and update the best solution based on an objective function. Parameters: solution_array (list): A list of solutions to evaluate. optimization_type (str): "maximize" or "minimize". verbose (int): Verbosity level for logging. obj_func (callable): A function that takes a solution and returns its score. best_score (float, optional): The current best score. If not provided, it will be initialized based on the optimization_type. parallel_n_jobs (int): Number of CPU cores to use for parallel evaluation. If 1 (default), uses sequential processing. If >1, uses parallel processing with specified number of cores. If -1, uses all available cores. Returns: tuple: (best_score, best_solution, step_scores, best_step_score) best_score (float): The updated best score after evaluating solutions. best_solution (any): The solution corresponding to the best score. step_scores (list): A list of scores for each solution in solution_array. best_step_score (float): The best score in the current batch. Raises: ValueError: If optimization_type is not "maximize" or "minimize". """ self.optimization_type = optimization_type # Store for _evaluate_single_solution # Use config default if parallel_n_jobs is None if parallel_n_jobs is None: parallel_n_jobs = get_config().parallel_n_jobs if optimization_type == "maximize": best_step_score = -float('inf') # Start with the lowest possible value for maximization if best_score is None: best_score = -float('inf') elif optimization_type == "minimize": best_step_score = float('inf') # Start with the highest possible value for minimization if best_score is None: best_score = float('inf') else: raise ValueError("optimization_type must be 'maximize' or 'minimize'") # Evaluate solutions either in parallel or sequentially if parallel_n_jobs != 1: try: # If parallel_n_jobs is -1, use all available cores if parallel_n_jobs == -1: parallel_n_jobs = mp.cpu_count() # Ensure parallel_n_jobs is at least 1 parallel_n_jobs = max(1, min(parallel_n_jobs, mp.cpu_count())) if verbose > 1: log_debug(f"Using {parallel_n_jobs} CPU cores for parallel evaluation") with mp.Pool(processes=parallel_n_jobs) as pool: step_scores = pool.map(self._evaluate_single_solution, solution_array) except Exception as e: log_error(f"Parallel evaluation failed, falling back to sequential: {e}") step_scores = [self._evaluate_single_solution(solution) for solution in solution_array] else: step_scores = [self._evaluate_single_solution(solution) for solution in solution_array] # Process results for i, score in enumerate(step_scores): if verbose > 1: log_debug(f"Score for solution {solution_array[i]}: {score}") # Update best step score for the current batch if (optimization_type == "maximize" and score > best_step_score) or \ (optimization_type == "minimize" and score < best_step_score): best_step_score = score # Update the overall best score and solution if found if (optimization_type == "maximize" and score > best_score) or \ (optimization_type == "minimize" and score < best_score): best_score = score best_solution = solution_array[i] return best_score, best_solution, step_scores, best_step_score
[docs] def _evaluate_single_solution(self, solution): """ Evaluate a single solution using the objective function. Parameters: solution: The solution to evaluate Returns: float: The score of the solution """ try: return self.obj_func(solution) except Exception as e: log_error(f"Error occurred while evaluating solution {solution}: {e}") return float('inf') if self.optimization_type == "minimize" else float('-inf')
[docs] def _initialize_callbacks(self,callbacks, temperature): """ Initialize wait counter and temperature for callbacks if early stopping or adaptive temperature is used. Parameters: - callbacks (list): A list of callback functions. - temperature (float): The initial temperature for the LLM model. """ if callbacks: for callback in callbacks: if isinstance(callback, EarlyStopping): callback.wait = 0 if isinstance(callback, AdaptTempOnPlateau): callback.temperature = temperature callback.wait = 0