Source code for MarketEnv

# Define the environment
import gym
import numpy as np
from gym import spaces


# Define the market environment
[docs]class MarketEnvironment(gym.Env): """A custom environment that simulates a market with two sellers and four buyers. Each seller sets a price (the action), and each buyer has a certain willingness to pay. The state of the environment consists of the two prices set by the sellers and the current willingness to pay of each buyer. The episode ends when all buyers have bought the products. """ def __init__(self): """Initialize the MarketEnvironment. Define action and observation spaces. Initialize state and prices history. """ super(MarketEnvironment, self).__init__() # Define action and observation space # They must be gym.spaces objects # Example when using discrete actions, Box(2,) for two sellers self.action_space = spaces.MultiDiscrete([101, 101]) # Prices could range from 0 to 100, there are four buyers self.observation_space = spaces.Box(low=0, high=100, shape=(6,)) self.prices_history = [] # Initialize state self.reset()
[docs] def step(self, action): """Execute one time step within the environment. Parameters ---------- action : array_like The prices set by the sellers. Returns ------- array_like The new state of the environment after taking the action. float The reward achieved by the action. bool A flag denoting whether the episode has ended. dict An empty dictionary for additional info (unused in this environment). """ # Execute one time step within the environment assert self.action_space.contains(action) # Simple model: buyers buy from the cheapest seller sorted_sellers = np.argsort(action) self.state[0] = action[sorted_sellers[0]] self.state[1] = action[sorted_sellers[1]] # Distribute the buyers for i in range(2, 6): if self.state[i] >= self.state[0]: self.state[0] += self.state[i] self.state[i] = 0 elif self.state[i] >= self.state[1]: self.state[1] += self.state[i] self.state[i] = 0 # Set reward as the profit of the sellers reward = self.state[0] + self.state[1] # Set done flag if all buyers have bought the products done = np.sum(self.state[2:]) == 0 # Save the prices to history self.prices_history.append(action) return self.state, reward, done, {}
[docs] def reset(self): """Reset the state of the environment to an initial state. Returns ------- array_like The initial state of the environment. """ # Reset the state of the environment to an initial state self.state = np.zeros(6) # Initialize buyers' willingness to pay self.state[2:6] = np.random.uniform(low=0, high=100, size=4) return self.state