Source code for MarketEnv

# Define the environment
import gym
import numpy as np
from gym import spaces


# Define the market environment
[docs]class MarketEnvironment(gym.Env):
    """A custom environment that simulates a market with two sellers and four buyers.

    Each seller sets a price (the action), and each buyer has a certain willingness to
    pay. The state of the environment consists of the two prices set by the sellers and
    the current willingness to pay of each buyer.

    The episode ends when all buyers have bought the products.
    """

    def __init__(self):
        """Initialize the MarketEnvironment.

        Define action and observation spaces. Initialize state and prices history.
        """
        super(MarketEnvironment, self).__init__()

        # Define action and observation space
        # They must be gym.spaces objects
        # Example when using discrete actions, Box(2,) for two sellers
        self.action_space = spaces.MultiDiscrete([101, 101])

        # Prices could range from 0 to 100, there are four buyers
        self.observation_space = spaces.Box(low=0, high=100, shape=(6,))
        self.prices_history = []

        # Initialize state
        self.reset()

[docs]    def step(self, action):
        """Execute one time step within the environment.

        Parameters
        ----------
        action : array_like
            The prices set by the sellers.

        Returns
        -------
        array_like
            The new state of the environment after taking the action.
        float
            The reward achieved by the action.
        bool
            A flag denoting whether the episode has ended.
        dict
            An empty dictionary for additional info (unused in this environment).
        """

        # Execute one time step within the environment
        assert self.action_space.contains(action)

        # Simple model: buyers buy from the cheapest seller
        sorted_sellers = np.argsort(action)
        self.state[0] = action[sorted_sellers[0]]
        self.state[1] = action[sorted_sellers[1]]

        # Distribute the buyers
        for i in range(2, 6):
            if self.state[i] >= self.state[0]:
                self.state[0] += self.state[i]
                self.state[i] = 0
            elif self.state[i] >= self.state[1]:
                self.state[1] += self.state[i]
                self.state[i] = 0

        # Set reward as the profit of the sellers
        reward = self.state[0] + self.state[1]

        # Set done flag if all buyers have bought the products
        done = np.sum(self.state[2:]) == 0

        # Save the prices to history
        self.prices_history.append(action)

        return self.state, reward, done, {}

[docs]    def reset(self):
        """Reset the state of the environment to an initial state.

        Returns
        -------
        array_like
            The initial state of the environment.
        """

        # Reset the state of the environment to an initial state
        self.state = np.zeros(6)
        # Initialize buyers' willingness to pay
        self.state[2:6] = np.random.uniform(low=0, high=100, size=4)
        return self.state