random_policy.py

"""
Module to show the example use case of the AntiPoachingGame 
environment. This program simulates a random strategy for 
each player.
"""

import sys
from anti_poaching.anti_poaching_v0 import anti_poaching

if __name__ == "__main__":
    render_mode = sys.argv[1] if len(sys.argv) > 1 else "rgb"
    cg = anti_poaching.parallel_env(grid_size=10, render_mode=render_mode)
    done = False
    observations, terminations, truncations = (
        None,
        None,
        None,
    )
    action_mask = {
        agent: cg.grid.permitted_movements(agent) for agent in cg.agents
    }

    print("GAME BEGIN" + "-" * 40)
    while not done:
        # sample the actions for each agent randomly
        print(action_mask)
        actions = {
            agent: cg.action_space(agent).sample(mask=action_mask[agent])
            for agent in cg.agents
        }
        # step through the environment
        print("PRE-STEP:")
        for agent in cg.agents:
            print(f"\t{agent} is in old state:{cg.grid.state[agent]}")
            print(f"\t{agent} has chosen action:{actions[agent]}\n")

        print("STEP: ")
        observations, _, terminations, truncations, _ = cg.step(actions)
        print()

        # update the possible actions for each agent
        action_mask = {
            agent: observations[agent]["action_mask"] for agent in cg.agents
        }

        # post-processing: find out if the game is over
        # and give relevant outputs.
        done = all(
            x or y for x, y in zip(terminations.values(), truncations.values())
        )
        print("POST-STEP:")
        for agent in cg.agents:
            print(f"\t{agent} is in new state:{cg.grid.state[agent]}")
        print()

        cg.render()
        print("-" * 40)

    print("\n GAME OVER !\n")
    print(cg.curr_time, " is the current time")
    print(cg.max_time, " is the maximum time")
    print("Rewards: ")
    for agent in cg.possible_agents:
        print(agent, " has ", cg.total_rewards[agent])