From 75c839bfb80e1632aa8a5aaaad8bedff69d5319a Mon Sep 17 00:00:00 2001 From: Simon Bjurek <simbj106@student.liu.se> Date: Wed, 19 Feb 2025 15:38:53 +0000 Subject: [PATCH] Add IO times for list schedulers --- b_asic/core_schedulers.py | 13 +- b_asic/resources.py | 2 +- b_asic/schedule.py | 4 +- b_asic/scheduler.py | 147 +++- b_asic/sfg_generators.py | 4 +- .../auto_scheduling_with_custom_io_times.py | 85 +++ examples/ldlt_matrix_inverse.py | 28 +- examples/memory_constrained_scheduling.py | 134 ++++ test/test_core_schedulers.py | 642 +++++++++++++++++- 9 files changed, 1026 insertions(+), 33 deletions(-) create mode 100644 examples/auto_scheduling_with_custom_io_times.py create mode 100644 examples/memory_constrained_scheduling.py diff --git a/b_asic/core_schedulers.py b/b_asic/core_schedulers.py index 1b5b4f18..32cb23db 100644 --- a/b_asic/core_schedulers.py +++ b/b_asic/core_schedulers.py @@ -124,7 +124,8 @@ class EarliestDeadlineScheduler(ListScheduler): deadlines = {} for op_id, start_time in schedule_copy.start_times.items(): - deadlines[op_id] = start_time + schedule.sfg.find_by_id(op_id).latency + if not op_id.startswith("in"): + deadlines[op_id] = start_time + schedule.sfg.find_by_id(op_id).latency return sorted(deadlines, key=deadlines.get) @@ -137,7 +138,10 @@ class LeastSlackTimeScheduler(ListScheduler): schedule_copy = copy.copy(schedule) ALAPScheduler().apply_scheduling(schedule_copy) - return sorted(schedule_copy.start_times, key=schedule_copy.start_times.get) + sorted_ops = sorted( + schedule_copy.start_times, key=schedule_copy.start_times.get + ) + return [op for op in sorted_ops if not op.startswith("in")] class MaxFanOutScheduler(ListScheduler): @@ -152,7 +156,8 @@ class MaxFanOutScheduler(ListScheduler): for op_id, start_time in schedule_copy.start_times.items(): fan_outs[op_id] = len(schedule.sfg.find_by_id(op_id).output_signals) - return sorted(fan_outs, key=fan_outs.get, reverse=True) + sorted_ops = sorted(fan_outs, key=fan_outs.get, reverse=True) + return [op for op in sorted_ops if not op.startswith("in")] class HybridScheduler(ListScheduler): @@ -199,4 +204,4 @@ class HybridScheduler(ListScheduler): sorted_op_list = [pair[0] for pair in fan_out_sorted_items] - return sorted_op_list + return [op for op in sorted_op_list if not op.startswith("in")] diff --git a/b_asic/resources.py b/b_asic/resources.py index 97dffb68..bc850f1f 100644 --- a/b_asic/resources.py +++ b/b_asic/resources.py @@ -907,7 +907,7 @@ class ProcessCollection: def split_on_ports( self, - heuristic: str = "left_edge", + heuristic: str = "graph_color", read_ports: Optional[int] = None, write_ports: Optional[int] = None, total_ports: Optional[int] = None, diff --git a/b_asic/schedule.py b/b_asic/schedule.py index 6cfabb17..6e231c2c 100644 --- a/b_asic/schedule.py +++ b/b_asic/schedule.py @@ -119,9 +119,9 @@ class Schedule: self._remove_delays_no_laps() max_end_time = self.get_max_end_time() - if schedule_time is None: + if not self._schedule_time: self._schedule_time = max_end_time - elif schedule_time < max_end_time: + elif self._schedule_time < max_end_time: raise ValueError(f"Too short schedule time. Minimum is {max_end_time}.") def __str__(self) -> str: diff --git a/b_asic/scheduler.py b/b_asic/scheduler.py index 4f1a58f2..c4d5de35 100644 --- a/b_asic/scheduler.py +++ b/b_asic/scheduler.py @@ -1,3 +1,4 @@ +import sys from abc import ABC, abstractmethod from typing import TYPE_CHECKING, Optional, cast @@ -9,6 +10,7 @@ from b_asic.types import TypeName if TYPE_CHECKING: from b_asic.operation import Operation from b_asic.schedule import Schedule + from b_asic.signal_flow_graph import SFG from b_asic.types import GraphID @@ -44,9 +46,21 @@ class Scheduler(ABC): ] + cast(int, source_port.latency_offset) +# TODO: Rename max_concurrent_reads/writes to max_concurrent_read_ports or something to signify difference + + class ListScheduler(Scheduler, ABC): - def __init__(self, max_resources: Optional[dict[TypeName, int]] = None) -> None: - if max_resources: + def __init__( + self, + max_resources: Optional[dict[TypeName, int]] = None, + max_concurrent_reads: Optional[int] = None, + max_concurrent_writes: Optional[int] = None, + input_times: Optional[dict["GraphID", int]] = None, + output_delta_times: Optional[dict["GraphID", int]] = None, + cyclic: Optional[bool] = False, + ) -> None: + super() + if max_resources is not None: if not isinstance(max_resources, dict): raise ValueError("max_resources must be a dictionary.") for key, value in max_resources.items(): @@ -54,12 +68,20 @@ class ListScheduler(Scheduler, ABC): raise ValueError("max_resources key must be a valid type_name.") if not isinstance(value, int): raise ValueError("max_resources value must be an integer.") - - if max_resources: self._max_resources = max_resources else: self._max_resources = {} + self._max_concurrent_reads = ( + max_concurrent_reads if max_concurrent_reads else sys.maxsize + ) + self._max_concurrent_writes = ( + max_concurrent_writes if max_concurrent_writes else sys.maxsize + ) + + self._input_times = input_times if input_times else {} + self._output_delta_times = output_delta_times if output_delta_times else {} + def apply_scheduling(self, schedule: "Schedule") -> None: """Applies the scheduling algorithm on the given Schedule. @@ -69,40 +91,63 @@ class ListScheduler(Scheduler, ABC): Schedule to apply the scheduling algorithm on. """ sfg = schedule.sfg - start_times = schedule.start_times used_resources_ready_times = {} remaining_resources = self._max_resources.copy() sorted_operations = self._get_sorted_operations(schedule) - # place all inputs at time 0 + schedule.start_times = {} + + remaining_reads = self._max_concurrent_reads + + # initial input placement + if self._input_times: + for input_id in self._input_times: + schedule.start_times[input_id] = self._input_times[input_id] + for input_op in sfg.find_by_type_name(Input.type_name()): - start_times[input_op.graph_id] = 0 + if input_op.graph_id not in self._input_times: + schedule.start_times[input_op.graph_id] = 0 current_time = 0 + timeout_counter = 0 while sorted_operations: # generate the best schedulable candidate candidate = sfg.find_by_id(sorted_operations[0]) counter = 0 while not self._candidate_is_schedulable( - start_times, + schedule.start_times, + sfg, candidate, current_time, remaining_resources, + remaining_reads, + self._max_concurrent_writes, sorted_operations, ): if counter == len(sorted_operations): counter = 0 current_time += 1 + timeout_counter += 1 + + if timeout_counter > 10: + msg = "Algorithm did not schedule any operation for 10 time steps, try relaxing constraints." + raise TimeoutError(msg) + + remaining_reads = self._max_concurrent_reads + # update available operators for operation, ready_time in used_resources_ready_times.items(): if ready_time == current_time: remaining_resources[operation.type_name()] += 1 + else: candidate = sfg.find_by_id(sorted_operations[counter]) counter += 1 + timeout_counter = 0 + # if the resource is constrained, update remaining resources if candidate.type_name() in remaining_resources: remaining_resources[candidate.type_name()] -= 1 @@ -115,19 +160,24 @@ class ListScheduler(Scheduler, ABC): current_time + candidate.latency ) + remaining_reads -= candidate.input_count + # schedule the best candidate to the current time sorted_operations.remove(candidate.graph_id) - start_times[candidate.graph_id] = current_time - - schedule.set_schedule_time(current_time) + schedule.start_times[candidate.graph_id] = current_time self._handle_outputs(schedule) + + if not schedule.cyclic: + max_start_time = max(schedule.start_times.values()) + if current_time < max_start_time: + current_time = max_start_time + current_time = max(current_time, schedule.get_max_end_time()) + schedule.set_schedule_time(current_time) + schedule.remove_delays() - # move all inputs ALAP now that operations have moved - for input_op in schedule.sfg.find_by_type_name(Input.type_name()): - input_op = cast(Input, input_op) - schedule.move_operation_alap(input_op.graph_id) + self._handle_inputs(schedule) # move all dont cares ALAP for dc_op in schedule.sfg.find_by_type_name(DontCare.type_name()): @@ -137,9 +187,12 @@ class ListScheduler(Scheduler, ABC): @staticmethod def _candidate_is_schedulable( start_times: dict["GraphID"], + sfg: "SFG", operation: "Operation", current_time: int, remaining_resources: dict["GraphID", int], + remaining_reads: int, + max_concurrent_writes: int, remaining_ops: list["GraphID"], ) -> bool: if ( @@ -148,23 +201,79 @@ class ListScheduler(Scheduler, ABC): ): return False + op_finish_time = current_time + operation.latency + future_ops = [ + sfg.find_by_id(item[0]) + for item in start_times.items() + if item[1] + sfg.find_by_id(item[0]).latency == op_finish_time + ] + + future_ops_writes = sum([op.input_count for op in future_ops]) + + if ( + not operation.graph_id.startswith("out") + and future_ops_writes >= max_concurrent_writes + ): + return False + + read_counter = 0 earliest_start_time = 0 for op_input in operation.inputs: source_op = op_input.signals[0].source.operation + if isinstance(source_op, Delay): + continue + source_op_graph_id = source_op.graph_id if source_op_graph_id in remaining_ops: return False + if start_times[source_op_graph_id] != current_time - 1: + # not a direct connection -> memory read required + read_counter += 1 + + if read_counter > remaining_reads: + return False + proceeding_op_start_time = start_times.get(source_op_graph_id) + proceeding_op_finish_time = proceeding_op_start_time + source_op.latency + + # if not proceeding_op_finish_time == current_time: + # # not direct connection -> memory required, check if okay + # satisfying_remaining_reads = remaining_reads >= operation.input_count + # satisfying_remaining_writes = remaining_writes >= operation.output_count + # if not (satisfying_remaining_reads and satisfying_remaining_writes): + # return False - if not isinstance(source_op, Delay): - earliest_start_time = max( - earliest_start_time, proceeding_op_start_time + source_op.latency - ) + earliest_start_time = max(earliest_start_time, proceeding_op_finish_time) return earliest_start_time <= current_time @abstractmethod def _get_sorted_operations(schedule: "Schedule") -> list["GraphID"]: raise NotImplementedError + + def _handle_inputs(self, schedule: "Schedule") -> None: + for input_op in schedule.sfg.find_by_type_name(Input.type_name()): + input_op = cast(Input, input_op) + if input_op.graph_id not in self._input_times: + schedule.move_operation_alap(input_op.graph_id) + + def _handle_outputs( + self, schedule: "Schedule", non_schedulable_ops: Optional[list["GraphID"]] = [] + ) -> None: + super()._handle_outputs(schedule, non_schedulable_ops) + + schedule.set_schedule_time(schedule.get_max_end_time()) + + for output in schedule.sfg.find_by_type_name(Output.type_name()): + output = cast(Output, output) + if output.graph_id in self._output_delta_times: + delta_time = self._output_delta_times[output.graph_id] + if schedule.cyclic: + schedule.start_times[output.graph_id] = schedule.schedule_time + schedule.move_operation(output.graph_id, delta_time) + else: + schedule.start_times[output.graph_id] = ( + schedule.schedule_time + delta_time + ) diff --git a/b_asic/sfg_generators.py b/b_asic/sfg_generators.py index 3e76f159..2d68215c 100644 --- a/b_asic/sfg_generators.py +++ b/b_asic/sfg_generators.py @@ -415,7 +415,7 @@ def radix_2_dif_fft(points: int) -> SFG: inputs = [] for i in range(points): - inputs.append(Input(name=f"Input: {i}")) + inputs.append(Input()) ports = inputs number_of_stages = int(np.log2(points)) @@ -430,7 +430,7 @@ def radix_2_dif_fft(points: int) -> SFG: ports = _get_bit_reversed_ports(ports) outputs = [] for i, port in enumerate(ports): - outputs.append(Output(port, name=f"Output: {i}")) + outputs.append(Output(port)) return SFG(inputs=inputs, outputs=outputs) diff --git a/examples/auto_scheduling_with_custom_io_times.py b/examples/auto_scheduling_with_custom_io_times.py new file mode 100644 index 00000000..8913bfd8 --- /dev/null +++ b/examples/auto_scheduling_with_custom_io_times.py @@ -0,0 +1,85 @@ +""" +========================================= +Auto Scheduling With Custom IO times +========================================= + +""" + +from b_asic.core_operations import Butterfly, ConstantMultiplication +from b_asic.core_schedulers import ASAPScheduler, HybridScheduler +from b_asic.schedule import Schedule +from b_asic.sfg_generators import radix_2_dif_fft + +sfg = radix_2_dif_fft(points=8) + +# %% +# The SFG is +sfg + +# %% +# Set latencies and execution times. +sfg.set_latency_of_type(Butterfly.type_name(), 3) +sfg.set_latency_of_type(ConstantMultiplication.type_name(), 2) +sfg.set_execution_time_of_type(Butterfly.type_name(), 1) +sfg.set_execution_time_of_type(ConstantMultiplication.type_name(), 1) + +# %% +# Generate an ASAP schedule for reference +schedule = Schedule(sfg, scheduler=ASAPScheduler()) +schedule.show() + +# %% +# Generate a non-cyclic Schedule from HybridScheduler with custom IO times. +resources = {Butterfly.type_name(): 1, ConstantMultiplication.type_name(): 1} +input_times = { + "in0": 0, + "in1": 1, + "in2": 2, + "in3": 3, + "in4": 4, + "in5": 5, + "in6": 6, + "in7": 7, +} +output_delta_times = { + "out0": -2, + "out1": -1, + "out2": 0, + "out3": 1, + "out4": 2, + "out5": 3, + "out6": 4, + "out7": 5, +} +schedule = Schedule( + sfg, + scheduler=HybridScheduler( + resources, + input_times=input_times, + output_delta_times=output_delta_times, + ), +) +schedule.show() + +# %% +# Generate a new Schedule with cyclic scheduling enabled +output_delta_times = { + "out0": 0, + "out1": 1, + "out2": 2, + "out3": 3, + "out4": 4, + "out5": 5, + "out6": 6, + "out7": 7, +} +schedule = Schedule( + sfg, + scheduler=HybridScheduler( + resources, + input_times=input_times, + output_delta_times=output_delta_times, + ), + cyclic=True, +) +schedule.show() diff --git a/examples/ldlt_matrix_inverse.py b/examples/ldlt_matrix_inverse.py index 845bd14b..cf5961aa 100644 --- a/examples/ldlt_matrix_inverse.py +++ b/examples/ldlt_matrix_inverse.py @@ -64,8 +64,31 @@ print("Scheduling time:", schedule.schedule_time) schedule.show() # %% -# Create a HybridScheduler schedule that satisfies the resource constraints. -schedule = Schedule(sfg, scheduler=HybridScheduler(resources)) +# Create a HybridScheduler schedule that satisfies the resource constraints with custom IO times. +# This is the schedule we will synthesize an architecture for. +input_times = { + "in0": 0, + "in1": 1, + "in2": 2, + "in3": 3, + "in4": 4, + "in5": 5, +} +output_delta_times = { + "out0": 0, + "out1": 1, + "out2": 2, + "out3": 3, + "out4": 4, + "out5": 5, +} +schedule = Schedule( + sfg, + scheduler=HybridScheduler( + resources, input_times=input_times, output_delta_times=output_delta_times + ), + cyclic=True, +) print("Scheduling time:", schedule.schedule_time) schedule.show() @@ -116,4 +139,3 @@ arch = Architecture( # %% arch -# schedule.edit() diff --git a/examples/memory_constrained_scheduling.py b/examples/memory_constrained_scheduling.py new file mode 100644 index 00000000..6c0ea9a8 --- /dev/null +++ b/examples/memory_constrained_scheduling.py @@ -0,0 +1,134 @@ +""" +========================================= +Memory Constrained Scheduling +========================================= + +""" + +from b_asic.architecture import Architecture, Memory, ProcessingElement +from b_asic.core_operations import Butterfly, ConstantMultiplication +from b_asic.core_schedulers import ASAPScheduler, HybridScheduler +from b_asic.schedule import Schedule +from b_asic.sfg_generators import radix_2_dif_fft +from b_asic.special_operations import Input, Output + +sfg = radix_2_dif_fft(points=16) + +# %% +# The SFG is +sfg + +# %% +# Set latencies and execution times. +sfg.set_latency_of_type(Butterfly.type_name(), 3) +sfg.set_latency_of_type(ConstantMultiplication.type_name(), 2) +sfg.set_execution_time_of_type(Butterfly.type_name(), 1) +sfg.set_execution_time_of_type(ConstantMultiplication.type_name(), 1) + +# # %% +# Generate an ASAP schedule for reference +schedule = Schedule(sfg, scheduler=ASAPScheduler()) +schedule.show() + +# %% +# Generate a PE constrained HybridSchedule +resources = {Butterfly.type_name(): 1, ConstantMultiplication.type_name(): 1} +schedule = Schedule(sfg, scheduler=HybridScheduler(resources)) +schedule.show() + +# %% +operations = schedule.get_operations() +bfs = operations.get_by_type_name(Butterfly.type_name()) +bfs.show(title="Butterfly executions") +const_muls = operations.get_by_type_name(ConstantMultiplication.type_name()) +const_muls.show(title="ConstMul executions") +inputs = operations.get_by_type_name(Input.type_name()) +inputs.show(title="Input executions") +outputs = operations.get_by_type_name(Output.type_name()) +outputs.show(title="Output executions") + +bf_pe = ProcessingElement(bfs, entity_name="bf") +mul_pe = ProcessingElement(const_muls, entity_name="mul") + +pe_in = ProcessingElement(inputs, entity_name='input') +pe_out = ProcessingElement(outputs, entity_name='output') + +mem_vars = schedule.get_memory_variables() +mem_vars.show(title="All memory variables") +direct, mem_vars = mem_vars.split_on_length() +mem_vars.show(title="Non-zero time memory variables") +mem_vars_set = mem_vars.split_on_ports(read_ports=1, write_ports=1, total_ports=2) + +# %% +memories = [] +for i, mem in enumerate(mem_vars_set): + memory = Memory(mem, memory_type="RAM", entity_name=f"memory{i}") + memories.append(memory) + mem.show(title=f"{memory.entity_name}") + memory.assign("left_edge") + memory.show_content(title=f"Assigned {memory.entity_name}") + +direct.show(title="Direct interconnects") + +# %% +arch = Architecture( + {bf_pe, mul_pe, pe_in, pe_out}, + memories, + direct_interconnects=direct, +) +arch + +# %% +# Generate another HybridSchedule but this time constrain the amount of reads and writes to reduce the amount of memories +resources = {Butterfly.type_name(): 1, ConstantMultiplication.type_name(): 1} +schedule = Schedule( + sfg, + scheduler=HybridScheduler( + resources, max_concurrent_reads=2, max_concurrent_writes=2 + ), +) +schedule.show() + +# %% Print the max number of read and write port accesses to non-direct memories +direct, mem_vars = schedule.get_memory_variables().split_on_length() +print("Max read ports:", mem_vars.read_ports_bound()) +print("Max write ports:", mem_vars.write_ports_bound()) + +# %% Proceed to construct PEs and plot executions and non-direct memory variables +operations = schedule.get_operations() +bfs = operations.get_by_type_name(Butterfly.type_name()) +bfs.show(title="Butterfly executions") +const_muls = operations.get_by_type_name(ConstantMultiplication.type_name()) +const_muls.show(title="ConstMul executions") +inputs = operations.get_by_type_name(Input.type_name()) +inputs.show(title="Input executions") +outputs = operations.get_by_type_name(Output.type_name()) +outputs.show(title="Output executions") + +bf_pe = ProcessingElement(bfs, entity_name="bf") +mul_pe = ProcessingElement(const_muls, entity_name="mul") + +pe_in = ProcessingElement(inputs, entity_name='input') +pe_out = ProcessingElement(outputs, entity_name='output') + +mem_vars.show(title="Non-zero time memory variables") +mem_vars_set = mem_vars.split_on_ports(read_ports=1, write_ports=1, total_ports=2) + +# %% Allocate memories by graph-coloring +memories = [] +for i, mem in enumerate(mem_vars_set): + memory = Memory(mem, memory_type="RAM", entity_name=f"memory{i}") + memories.append(memory) + mem.show(title=f"{memory.entity_name}") + memory.assign("left_edge") + memory.show_content(title=f"Assigned {memory.entity_name}") + +direct.show(title="Direct interconnects") + +# %% Synthesize the new architecture, now only using two memories but with data rate +arch = Architecture( + {bf_pe, mul_pe, pe_in, pe_out}, + memories, + direct_interconnects=direct, +) +arch diff --git a/test/test_core_schedulers.py b/test/test_core_schedulers.py index f934f157..8404db9c 100644 --- a/test/test_core_schedulers.py +++ b/test/test_core_schedulers.py @@ -1,13 +1,26 @@ import pytest -from b_asic.core_operations import Addition, Butterfly, ConstantMultiplication +from b_asic.core_operations import ( + MADS, + Addition, + Butterfly, + ConstantMultiplication, + Reciprocal, +) from b_asic.core_schedulers import ( ALAPScheduler, ASAPScheduler, EarliestDeadlineScheduler, + HybridScheduler, + LeastSlackTimeScheduler, + MaxFanOutScheduler, ) from b_asic.schedule import Schedule -from b_asic.sfg_generators import direct_form_1_iir, radix_2_dif_fft +from b_asic.sfg_generators import ( + direct_form_1_iir, + ldlt_matrix_inverse, + radix_2_dif_fft, +) class TestASAPScheduler: @@ -484,3 +497,628 @@ class TestEarliestDeadlineScheduler: "out3": 7, } assert schedule.schedule_time == 7 + + +class TestLeastSlackTimeScheduler: + def test_empty_sfg(self, sfg_empty): + with pytest.raises( + ValueError, match="Empty signal flow graph cannot be scheduled." + ): + Schedule(sfg_empty, scheduler=LeastSlackTimeScheduler()) + + def test_direct_form_1_iir(self): + sfg = direct_form_1_iir([1, 2, 3], [1, 2, 3]) + + sfg.set_latency_of_type(ConstantMultiplication.type_name(), 2) + sfg.set_execution_time_of_type(ConstantMultiplication.type_name(), 1) + sfg.set_latency_of_type(Addition.type_name(), 3) + sfg.set_execution_time_of_type(Addition.type_name(), 1) + + resources = {Addition.type_name(): 1, ConstantMultiplication.type_name(): 1} + schedule = Schedule( + sfg, scheduler=LeastSlackTimeScheduler(max_resources=resources) + ) + + assert schedule.start_times == { + "cmul4": 0, + "cmul3": 1, + "in0": 2, + "cmul0": 2, + "add1": 3, + "cmul1": 3, + "cmul2": 4, + "add3": 6, + "add0": 7, + "add2": 10, + "out0": 13, + } + assert schedule.schedule_time == 13 + + def test_direct_form_2_iir_inf_resources_no_exec_time( + self, sfg_direct_form_iir_lp_filter + ): + sfg_direct_form_iir_lp_filter.set_latency_of_type(Addition.type_name(), 5) + sfg_direct_form_iir_lp_filter.set_latency_of_type( + ConstantMultiplication.type_name(), 4 + ) + + schedule = Schedule( + sfg_direct_form_iir_lp_filter, scheduler=LeastSlackTimeScheduler() + ) + + # should be the same as for ASAP due to infinite resources, except for input + assert schedule.start_times == { + "in0": 9, + "cmul1": 0, + "cmul4": 0, + "cmul2": 0, + "cmul3": 0, + "add3": 4, + "add1": 4, + "add0": 9, + "cmul0": 14, + "add2": 18, + "out0": 23, + } + assert schedule.schedule_time == 23 + + def test_direct_form_2_iir_1_add_1_mul_no_exec_time( + self, sfg_direct_form_iir_lp_filter + ): + sfg_direct_form_iir_lp_filter.set_latency_of_type(Addition.type_name(), 5) + sfg_direct_form_iir_lp_filter.set_latency_of_type( + ConstantMultiplication.type_name(), 4 + ) + + max_resources = {ConstantMultiplication.type_name(): 1, Addition.type_name(): 1} + + schedule = Schedule( + sfg_direct_form_iir_lp_filter, + scheduler=LeastSlackTimeScheduler(max_resources), + ) + assert schedule.start_times == { + "cmul4": 0, + "cmul3": 4, + "cmul1": 8, + "add1": 8, + "cmul2": 12, + "in0": 13, + "add0": 13, + "add3": 18, + "cmul0": 18, + "add2": 23, + "out0": 28, + } + + assert schedule.schedule_time == 28 + + def test_direct_form_2_iir_1_add_1_mul_exec_time_1( + self, sfg_direct_form_iir_lp_filter + ): + sfg_direct_form_iir_lp_filter.set_latency_of_type( + ConstantMultiplication.type_name(), 3 + ) + sfg_direct_form_iir_lp_filter.set_latency_of_type(Addition.type_name(), 2) + sfg_direct_form_iir_lp_filter.set_execution_time_of_type( + ConstantMultiplication.type_name(), 1 + ) + sfg_direct_form_iir_lp_filter.set_execution_time_of_type( + Addition.type_name(), 1 + ) + + max_resources = {ConstantMultiplication.type_name(): 1, Addition.type_name(): 1} + + schedule = Schedule( + sfg_direct_form_iir_lp_filter, + scheduler=LeastSlackTimeScheduler(max_resources), + ) + assert schedule.start_times == { + "cmul4": 0, + "cmul3": 1, + "cmul1": 2, + "cmul2": 3, + "add1": 4, + "in0": 6, + "add0": 6, + "add3": 7, + "cmul0": 8, + "add2": 11, + "out0": 13, + } + + assert schedule.schedule_time == 13 + + def test_direct_form_2_iir_2_add_3_mul_exec_time_1( + self, sfg_direct_form_iir_lp_filter + ): + sfg_direct_form_iir_lp_filter.set_latency_of_type( + ConstantMultiplication.type_name(), 3 + ) + sfg_direct_form_iir_lp_filter.set_latency_of_type(Addition.type_name(), 2) + sfg_direct_form_iir_lp_filter.set_execution_time_of_type( + ConstantMultiplication.type_name(), 1 + ) + sfg_direct_form_iir_lp_filter.set_execution_time_of_type( + Addition.type_name(), 1 + ) + + max_resources = {ConstantMultiplication.type_name(): 3, Addition.type_name(): 2} + + schedule = Schedule( + sfg_direct_form_iir_lp_filter, + scheduler=LeastSlackTimeScheduler(max_resources), + ) + assert schedule.start_times == { + "cmul1": 0, + "cmul4": 0, + "cmul3": 0, + "cmul2": 1, + "add1": 3, + "add3": 4, + "in0": 5, + "add0": 5, + "cmul0": 7, + "add2": 10, + "out0": 12, + } + + assert schedule.schedule_time == 12 + + def test_radix_2_fft_8_points(self): + sfg = radix_2_dif_fft(points=8) + + sfg.set_latency_of_type(ConstantMultiplication.type_name(), 2) + sfg.set_execution_time_of_type(ConstantMultiplication.type_name(), 1) + sfg.set_latency_of_type(Butterfly.type_name(), 1) + sfg.set_execution_time_of_type(Butterfly.type_name(), 1) + + resources = {Butterfly.type_name(): 2, ConstantMultiplication.type_name(): 2} + schedule = Schedule( + sfg, scheduler=LeastSlackTimeScheduler(max_resources=resources) + ) + + assert schedule.start_times == { + "in1": 0, + "in3": 0, + "in5": 0, + "in7": 0, + "bfly6": 0, + "bfly8": 0, + "in2": 1, + "in6": 1, + "cmul2": 1, + "cmul3": 1, + "bfly11": 1, + "bfly7": 1, + "in0": 2, + "in4": 2, + "cmul0": 2, + "bfly0": 2, + "cmul4": 2, + "bfly5": 3, + "bfly1": 3, + "cmul1": 4, + "bfly2": 4, + "bfly9": 4, + "bfly10": 5, + "bfly3": 5, + "out0": 5, + "out4": 5, + "bfly4": 6, + "out1": 6, + "out2": 6, + "out5": 6, + "out6": 6, + "out7": 7, + "out3": 7, + } + assert schedule.schedule_time == 7 + + +class TestMaxFanOutScheduler: + def test_empty_sfg(self, sfg_empty): + with pytest.raises( + ValueError, match="Empty signal flow graph cannot be scheduled." + ): + Schedule(sfg_empty, scheduler=MaxFanOutScheduler()) + + def test_direct_form_1_iir(self): + sfg = direct_form_1_iir([1, 2, 3], [1, 2, 3]) + + sfg.set_latency_of_type(ConstantMultiplication.type_name(), 2) + sfg.set_execution_time_of_type(ConstantMultiplication.type_name(), 1) + sfg.set_latency_of_type(Addition.type_name(), 3) + sfg.set_execution_time_of_type(Addition.type_name(), 1) + + resources = {Addition.type_name(): 1, ConstantMultiplication.type_name(): 1} + schedule = Schedule(sfg, scheduler=MaxFanOutScheduler(max_resources=resources)) + + assert schedule.start_times == { + "in0": 0, + "cmul0": 0, + "cmul1": 1, + "cmul2": 2, + "cmul4": 3, + "cmul3": 4, + "add3": 4, + "add1": 6, + "add0": 9, + "add2": 12, + "out0": 15, + } + assert schedule.schedule_time == 15 + + +class TestHybridScheduler: + def test_empty_sfg(self, sfg_empty): + with pytest.raises( + ValueError, match="Empty signal flow graph cannot be scheduled." + ): + Schedule(sfg_empty, scheduler=HybridScheduler()) + + def test_direct_form_1_iir(self): + sfg = direct_form_1_iir([1, 2, 3], [1, 2, 3]) + + sfg.set_latency_of_type(ConstantMultiplication.type_name(), 2) + sfg.set_execution_time_of_type(ConstantMultiplication.type_name(), 1) + sfg.set_latency_of_type(Addition.type_name(), 3) + sfg.set_execution_time_of_type(Addition.type_name(), 1) + + resources = {Addition.type_name(): 1, ConstantMultiplication.type_name(): 1} + schedule = Schedule(sfg, scheduler=HybridScheduler(max_resources=resources)) + + assert schedule.start_times == { + "cmul4": 0, + "cmul3": 1, + "in0": 2, + "cmul0": 2, + "add1": 3, + "cmul1": 3, + "cmul2": 4, + "add3": 6, + "add0": 7, + "add2": 10, + "out0": 13, + } + assert schedule.schedule_time == 13 + + def test_radix_2_fft_8_points(self): + sfg = radix_2_dif_fft(points=8) + + sfg.set_latency_of_type(ConstantMultiplication.type_name(), 2) + sfg.set_execution_time_of_type(ConstantMultiplication.type_name(), 1) + sfg.set_latency_of_type(Butterfly.type_name(), 1) + sfg.set_execution_time_of_type(Butterfly.type_name(), 1) + + resources = {Butterfly.type_name(): 2, ConstantMultiplication.type_name(): 2} + schedule = Schedule(sfg, scheduler=HybridScheduler(max_resources=resources)) + + assert schedule.start_times == { + "in1": 0, + "in3": 0, + "in5": 0, + "in7": 0, + "bfly6": 0, + "bfly8": 0, + "in2": 1, + "in6": 1, + "cmul2": 1, + "cmul3": 1, + "bfly11": 1, + "bfly7": 1, + "in0": 2, + "in4": 2, + "cmul0": 2, + "bfly0": 2, + "cmul4": 2, + "bfly5": 3, + "bfly1": 3, + "cmul1": 4, + "bfly2": 4, + "bfly9": 4, + "bfly10": 5, + "bfly3": 5, + "out0": 5, + "out4": 5, + "bfly4": 6, + "out1": 6, + "out2": 6, + "out5": 6, + "out6": 6, + "out7": 7, + "out3": 7, + } + assert schedule.schedule_time == 7 + + def test_radix_2_fft_8_points_specified_IO_times_cyclic(self): + sfg = radix_2_dif_fft(points=8) + + sfg.set_latency_of_type(Butterfly.type_name(), 3) + sfg.set_latency_of_type(ConstantMultiplication.type_name(), 2) + sfg.set_execution_time_of_type(Butterfly.type_name(), 1) + sfg.set_execution_time_of_type(ConstantMultiplication.type_name(), 1) + + resources = {Butterfly.type_name(): 1, ConstantMultiplication.type_name(): 1} + input_times = { + "in0": 0, + "in1": 1, + "in2": 2, + "in3": 3, + "in4": 4, + "in5": 5, + "in6": 6, + "in7": 7, + } + output_times = { + "out0": -2, + "out1": -1, + "out2": 0, + "out3": 1, + "out4": 2, + "out5": 3, + "out6": 4, + "out7": 5, + } + schedule = Schedule( + sfg, + scheduler=HybridScheduler( + resources, input_times=input_times, output_delta_times=output_times + ), + cyclic=True, + ) + + assert schedule.start_times == { + "in0": 0, + "in1": 1, + "in2": 2, + "in3": 3, + "in4": 4, + "in5": 5, + "in6": 6, + "in7": 7, + "bfly0": 4, + "bfly8": 5, + "bfly11": 6, + "bfly6": 7, + "cmul2": 8, + "cmul0": 9, + "bfly1": 9, + "cmul3": 10, + "bfly7": 10, + "bfly2": 11, + "bfly5": 12, + "cmul4": 13, + "bfly9": 13, + "bfly10": 15, + "cmul1": 15, + "bfly3": 16, + "bfly4": 17, + "out0": 18, + "out1": 19, + "out2": 20, + "out3": 1, + "out4": 2, + "out5": 3, + "out6": 4, + "out7": 5, + } + assert schedule.schedule_time == 20 + + def test_radix_2_fft_8_points_specified_IO_times_non_cyclic(self): + sfg = radix_2_dif_fft(points=8) + + sfg.set_latency_of_type(Butterfly.type_name(), 3) + sfg.set_latency_of_type(ConstantMultiplication.type_name(), 2) + sfg.set_execution_time_of_type(Butterfly.type_name(), 1) + sfg.set_execution_time_of_type(ConstantMultiplication.type_name(), 1) + + resources = {Butterfly.type_name(): 1, ConstantMultiplication.type_name(): 1} + input_times = { + "in0": 0, + "in1": 1, + "in2": 2, + "in3": 3, + "in4": 4, + "in5": 5, + "in6": 6, + "in7": 7, + } + output_times = { + "out0": -2, + "out1": -1, + "out2": 0, + "out3": 1, + "out4": 2, + "out5": 3, + "out6": 4, + "out7": 5, + } + schedule = Schedule( + sfg, + scheduler=HybridScheduler( + resources, input_times=input_times, output_delta_times=output_times + ), + cyclic=False, + ) + + assert schedule.start_times == { + "in0": 0, + "in1": 1, + "in2": 2, + "in3": 3, + "in4": 4, + "in5": 5, + "in6": 6, + "in7": 7, + "bfly0": 4, + "bfly8": 5, + "bfly11": 6, + "bfly6": 7, + "cmul2": 8, + "cmul0": 9, + "bfly1": 9, + "cmul3": 10, + "bfly7": 10, + "bfly2": 11, + "bfly5": 12, + "cmul4": 13, + "bfly9": 13, + "bfly10": 15, + "cmul1": 15, + "bfly3": 16, + "bfly4": 17, + "out0": 18, + "out1": 19, + "out2": 20, + "out3": 21, + "out4": 22, + "out5": 23, + "out6": 24, + "out7": 25, + } + assert schedule.schedule_time == 25 + + def test_ldlt_inverse_2x2(self): + sfg = ldlt_matrix_inverse(N=2) + + sfg.set_latency_of_type(MADS.type_name(), 3) + sfg.set_latency_of_type(Reciprocal.type_name(), 2) + sfg.set_execution_time_of_type(MADS.type_name(), 1) + sfg.set_execution_time_of_type(Reciprocal.type_name(), 1) + + resources = {MADS.type_name(): 1, Reciprocal.type_name(): 1} + schedule = Schedule( + sfg, + scheduler=HybridScheduler(resources), + ) + + assert schedule.start_times == { + "in0": 0, + "rec0": 0, + "in1": 2, + "dontcare1": 2, + "mads0": 2, + "in2": 5, + "mads3": 5, + "rec1": 8, + "dontcare0": 10, + "mads2": 10, + "mads1": 13, + "out2": 10, + "out1": 13, + "out0": 16, + } + assert schedule.schedule_time == 16 + + def test_ldlt_inverse_2x2_specified_IO_times_cyclic(self): + sfg = ldlt_matrix_inverse(N=2) + + sfg.set_latency_of_type(MADS.type_name(), 3) + sfg.set_latency_of_type(Reciprocal.type_name(), 2) + sfg.set_execution_time_of_type(MADS.type_name(), 1) + sfg.set_execution_time_of_type(Reciprocal.type_name(), 1) + + resources = {MADS.type_name(): 1, Reciprocal.type_name(): 1} + input_times = { + "in0": 0, + "in1": 1, + "in2": 2, + } + output_times = { + "out0": 0, + "out1": 1, + "out2": 2, + } + schedule = Schedule( + sfg, + scheduler=HybridScheduler( + resources, input_times=input_times, output_delta_times=output_times + ), + cyclic=True, + ) + + assert schedule.start_times == { + "in0": 0, + "in1": 1, + "in2": 2, + "rec0": 0, + "dontcare1": 2, + "mads0": 2, + "mads3": 5, + "rec1": 8, + "dontcare0": 10, + "mads2": 10, + "mads1": 13, + "out0": 16, + "out1": 1, + "out2": 2, + } + assert schedule.schedule_time == 16 + + def test_max_invalid_resources(self): + sfg = ldlt_matrix_inverse(N=2) + + sfg.set_latency_of_type(MADS.type_name(), 3) + sfg.set_latency_of_type(Reciprocal.type_name(), 2) + sfg.set_execution_time_of_type(MADS.type_name(), 1) + sfg.set_execution_time_of_type(Reciprocal.type_name(), 1) + + resources = 2 + with pytest.raises(ValueError, match="max_resources must be a dictionary."): + Schedule(sfg, scheduler=HybridScheduler(resources)) + + resources = "test" + with pytest.raises(ValueError, match="max_resources must be a dictionary."): + Schedule(sfg, scheduler=HybridScheduler(resources)) + + resources = [] + with pytest.raises(ValueError, match="max_resources must be a dictionary."): + Schedule(sfg, scheduler=HybridScheduler(resources)) + + resources = {1: 1} + with pytest.raises( + ValueError, match="max_resources key must be a valid type_name." + ): + Schedule(sfg, scheduler=HybridScheduler(resources)) + + resources = {MADS.type_name(): "test"} + with pytest.raises(ValueError, match="max_resources value must be an integer."): + Schedule(sfg, scheduler=HybridScheduler(resources)) + + # def test_ldlt_inverse_2x2_read_constrained(self): + # sfg = ldlt_matrix_inverse(N=2) + + # sfg.set_latency_of_type(MADS.type_name(), 3) + # sfg.set_latency_of_type(Reciprocal.type_name(), 2) + # sfg.set_execution_time_of_type(MADS.type_name(), 1) + # sfg.set_execution_time_of_type(Reciprocal.type_name(), 1) + + # resources = {MADS.type_name(): 1, Reciprocal.type_name(): 1} + # schedule = Schedule( + # sfg, + # scheduler=HybridScheduler( + # max_resources = resources, + # max_concurrent_reads = 3, + # ), + # ) + + def test_ldlt_inverse_2x2_read_constrained_too_low(self): + sfg = ldlt_matrix_inverse(N=2) + + sfg.set_latency_of_type(MADS.type_name(), 3) + sfg.set_latency_of_type(Reciprocal.type_name(), 2) + sfg.set_execution_time_of_type(MADS.type_name(), 1) + sfg.set_execution_time_of_type(Reciprocal.type_name(), 1) + + resources = {MADS.type_name(): 1, Reciprocal.type_name(): 1} + with pytest.raises( + TimeoutError, + match="Algorithm did not schedule any operation for 10 time steps, try relaxing constraints.", + ): + Schedule( + sfg, + scheduler=HybridScheduler( + max_resources=resources, + max_concurrent_reads=2, + ), + ) -- GitLab