diff --git a/b_asic/_preferences.py b/b_asic/_preferences.py index d3449aefb2cb5734e27f4f813264b1ccdbd1011b..004e498ccf997bec68298ef88ac73e7ee19d962a 100644 --- a/b_asic/_preferences.py +++ b/b_asic/_preferences.py @@ -11,6 +11,7 @@ SCHEDULE_OFFSET: float = 0.2 SPLINE_OFFSET: float = 0.2 +# Colors for architecture Digraph PE_COLOR = (0, 185, 231) # LiuBlue PE_CLUSTER_COLOR = (210, 238, 249) # LiuBlue5 MEMORY_COLOR = (0, 207, 181) # LiuGreen diff --git a/b_asic/architecture.py b/b_asic/architecture.py index b2938fdea1f0d9d793bd526691a3749cf1ec1da3..2edcd3525d483b3e67b356a9498db04734d1fdf6 100644 --- a/b_asic/architecture.py +++ b/b_asic/architecture.py @@ -773,7 +773,7 @@ of :class:`~b_asic.architecture.ProcessingElement` cluster: bool = True, splines: str = "spline", io_cluster: bool = True, - show_multiplexers: bool = True, + multiplexers: bool = True, colored: bool = True, ) -> Digraph: """ @@ -786,8 +786,9 @@ of :class:`~b_asic.architecture.ProcessingElement` splines : str, default: "spline" The type of interconnect to use for graph drawing. io_cluster : bool, default: True - Whether Inputs and Outputs are drawn inside an IO cluster. - show_multiplexers : bool, default: True + Whether Inputs and Outputs are drawn inside an IO cluster. Only relevant + if *cluster* is True. + multiplexers : bool, default: True Whether input multiplexers are included. colored : bool, default: True Whether to color the nodes. @@ -916,7 +917,7 @@ of :class:`~b_asic.architecture.ProcessingElement` destination_edges[destination_str].add(source_str) destination_list = {k: list(v) for k, v in destination_edges.items()} - if show_multiplexers: + if multiplexers: for destination, source_list in destination_list.items(): if len(source_list) > 1: # Create GraphViz struct for multiplexer @@ -940,7 +941,7 @@ of :class:`~b_asic.architecture.ProcessingElement` dg.edge(src_str, branch, arrowhead='none') src_str = branch for destination_str, cnt_str in destination_counts: - if show_multiplexers and len(destination_list[destination_str]) > 1: + if multiplexers and len(destination_list[destination_str]) > 1: idx = destination_list[destination_str].index(original_src_str) destination_str = f"{destination_str.replace(':', '_')}_mux:in{idx}" dg.edge(src_str, destination_str, label=cnt_str) diff --git a/b_asic/process.py b/b_asic/process.py index 5a1cc620a2a9d7916f61c0458befd114fcb32efd..bf0c2ef3c9c3df50919bdf989e8990d5ba4c2c7a 100644 --- a/b_asic/process.py +++ b/b_asic/process.py @@ -168,7 +168,7 @@ class MemoryProcess(Process): Parameters ---------- length : int, default: 0 - The life time length to split on. Length is inclusive for the smaller + The lifetime length to split on. Length is inclusive for the smaller process. Returns diff --git a/b_asic/resources.py b/b_asic/resources.py index d655175c6899fbcf6654dc606e1a71777820590a..51d05523014e98aea023cc04c8266ab95fad56d5 100644 --- a/b_asic/resources.py +++ b/b_asic/resources.py @@ -762,9 +762,10 @@ class ProcessCollection: exclusion_graph = nx.Graph() exclusion_graph.add_nodes_from(self._collection) for node1 in exclusion_graph: - node1_stop_times = tuple( + node1_stop_times = set( read_time % self.schedule_time for read_time in node1.read_times ) + node1_start_time = node1.start_time % self.schedule_time if total_ports == 1 and node1.start_time in node1_stop_times: print(node1.start_time, node1_stop_times) raise ValueError("Cannot read and write in same cycle.") @@ -775,24 +776,18 @@ class ProcessCollection: node2_stop_times = tuple( read_time % self.schedule_time for read_time in node2.read_times ) - for node1_stop_time in node1_stop_times: - for node2_stop_time in node2_stop_times: - if total_ports == 1: - # Single-port assignment - if node1.start_time == node2.start_time: - exclusion_graph.add_edge(node1, node2) - elif node1_stop_time == node2_stop_time: - exclusion_graph.add_edge(node1, node2) - elif node1.start_time == node2_stop_time: - exclusion_graph.add_edge(node1, node2) - elif node1_stop_time == node2.start_time: - exclusion_graph.add_edge(node1, node2) - else: - # Dual-port assignment - if node1.start_time == node2.start_time: - exclusion_graph.add_edge(node1, node2) - elif node1_stop_time == node2_stop_time: - exclusion_graph.add_edge(node1, node2) + node2_start_time = node2.start_time % self.schedule_time + if write_ports == 1 and node1_start_time == node2_start_time: + exclusion_graph.add_edge(node1, node2) + if read_ports == 1 and node1_stop_times.intersection( + node2_stop_times + ): + exclusion_graph.add_edge(node1, node2) + if total_ports == 1 and ( + node1_start_time in node2_stop_times + or node2_start_time in node1_stop_times + ): + exclusion_graph.add_edge(node1, node2) return exclusion_graph def create_exclusion_graph_from_execution_time(self) -> nx.Graph: diff --git a/b_asic/schedule.py b/b_asic/schedule.py index 242a9525e23b19ffa6777bc7043ea185d634032b..253c107460ce2e78e3d0ddc18b7bd09ba797294b 100644 --- a/b_asic/schedule.py +++ b/b_asic/schedule.py @@ -338,10 +338,10 @@ class Schedule: -------- get_max_time """ - if time < self.get_max_end_time(): + max_end_time = self.get_max_end_time() + if time < max_end_time: raise ValueError( - f"New schedule time ({time}) too short, minimum:" - f" {self.get_max_end_time()}." + f"New schedule time ({time}) too short, minimum: {max_end_time}." ) self._schedule_time = time return self @@ -758,7 +758,8 @@ class Schedule: } ret.append( MemoryVariable( - start_time + cast(int, outport.latency_offset), + (start_time + cast(int, outport.latency_offset)) + % self.schedule_time, outport, reads, outport.name, diff --git a/examples/fivepointwinograddft.py b/examples/fivepointwinograddft.py new file mode 100644 index 0000000000000000000000000000000000000000..8093a2e121ea33ba06d71768605c464f8913b789 --- /dev/null +++ b/examples/fivepointwinograddft.py @@ -0,0 +1,211 @@ +""" +======================= +Five-point Winograd DFT +======================= +""" + +from math import cos, pi, sin + +from b_asic.architecture import Architecture, Memory, ProcessingElement +from b_asic.core_operations import AddSub, Butterfly, ConstantMultiplication +from b_asic.schedule import Schedule +from b_asic.signal_flow_graph import SFG +from b_asic.special_operations import Input, Output + +u = -2 * pi / 5 +c50 = (cos(u) + cos(2 * u)) / 2 - 1 +c51 = (cos(u) - cos(2 * u)) / 2 +c52 = 1j * (sin(u) + sin(2 * u)) / 2 +c53 = 1j * (sin(2 * u)) +c54 = 1j * (sin(u) - sin(2 * u)) + + +in0 = Input("x0") +in1 = Input("x1") +in2 = Input("x2") +in3 = Input("x3") +in4 = Input("x4") +bf0 = Butterfly(in1, in3) +bf1 = Butterfly(in4, in2) +bf2 = Butterfly(bf0.output(0), bf1.output(0)) +a0 = AddSub(True, bf0.output(1), bf1.output(0)) +a1 = AddSub(True, bf2.output(0), in0) +# Should overload float*OutputPort as well +m0 = ConstantMultiplication(c50, bf2.output(0)) +m1 = ConstantMultiplication(c51, bf0.output(1)) +m2 = c52 * a0 +m3 = ConstantMultiplication(c53, bf2.output(1)) +m4 = ConstantMultiplication(c54, bf1.output(1)) +a2 = AddSub(True, m0, a1) +a3 = AddSub(False, m3, m2) +a4 = AddSub(True, m3, m4) +bf3 = Butterfly(a2, m1) +bf4 = Butterfly(bf3.output(0), a3) +bf5 = Butterfly(bf3.output(1), a4) + +out0 = Output(a1, "X0") +out1 = Output(bf4.output(0), "X1") +out2 = Output(bf4.output(1), "X2") +out4 = Output(bf5.output(0), "X4") +out3 = Output(bf5.output(1), "X3") + +sfg = SFG( + inputs=[in0, in1, in2, in3, in4], + outputs=[out0, out1, out2, out3, out4], + name="5-point Winograd DFT", +) + +# %% +# The SFG looks like +sfg + +# %% +# Set latencies and execution times +sfg.set_latency_of_type(ConstantMultiplication.type_name(), 2) +sfg.set_latency_of_type(AddSub.type_name(), 1) +sfg.set_latency_of_type(Butterfly.type_name(), 1) +sfg.set_execution_time_of_type(ConstantMultiplication.type_name(), 1) +sfg.set_execution_time_of_type(AddSub.type_name(), 1) +sfg.set_execution_time_of_type(Butterfly.type_name(), 1) + +# %% +# Generate schedule +schedule = Schedule(sfg, cyclic=True) +schedule.show() + +# Reschedule to only use one AddSub and one multiplier + +schedule.move_operation('out2', 4) +schedule.move_operation('out3', 4) +schedule.move_operation('out4', 3) +schedule.move_operation('out5', 6) +schedule.set_schedule_time(15) +schedule.move_operation('out5', 3) +schedule.move_operation('out4', 5) +schedule.move_operation('out3', 3) +schedule.move_operation('out2', 2) +schedule.move_operation('out1', 2) +schedule.move_operation('bfly4', 16) +schedule.move_operation('bfly3', 14) +schedule.move_operation('bfly2', 14) +schedule.move_operation('addsub3', 17) +schedule.move_operation('addsub5', 15) +schedule.move_operation('addsub2', 14) +schedule.move_operation('cmul5', 15) +schedule.move_operation('cmul3', 15) +schedule.move_operation('cmul1', 14) +schedule.move_operation('addsub1', 2) +schedule.move_operation('cmul2', 16) +schedule.move_operation('addsub4', 15) +schedule.move_operation('out1', 15) +schedule.move_operation('addsub1', 13) +schedule.move_operation('cmul4', 18) +schedule.move_operation('bfly1', 14) +schedule.move_operation('bfly6', 14) +schedule.move_operation('bfly5', 14) +schedule.move_operation('in5', 1) +schedule.move_operation('in3', 2) +schedule.move_operation('in2', 3) +schedule.move_operation('in4', 4) +schedule.move_operation('bfly6', -5) +schedule.move_operation('bfly5', -6) +schedule.move_operation('addsub1', -1) +schedule.move_operation('bfly1', -1) +schedule.move_operation('bfly1', -4) +schedule.move_operation('addsub1', -5) +schedule.move_operation('addsub4', -6) +schedule.move_operation('cmul4', -10) +schedule.move_operation('cmul2', -7) +schedule.move_operation('cmul1', -2) +schedule.move_operation('cmul3', -6) +schedule.move_operation('cmul5', -5) +schedule.move_operation('cmul1', -3) +schedule.move_operation('cmul5', -1) +schedule.set_schedule_time(13) +schedule.move_operation('bfly5', -6) +schedule.move_operation('bfly6', -1) +schedule.move_operation('cmul4', -6) +schedule.move_operation('addsub1', 4) +schedule.move_operation('cmul3', 4) +schedule.move_operation('cmul1', 3) +schedule.move_operation('bfly1', 3) +schedule.move_operation('cmul2', 5) +schedule.move_operation('cmul5', 4) +schedule.move_operation('addsub4', 4) +schedule.set_schedule_time(10) +schedule.move_operation('addsub1', -1) +schedule.move_operation('cmul4', 1) +schedule.move_operation('addsub4', -1) +schedule.move_operation('cmul5', -1) +schedule.move_operation('cmul2', -2) +schedule.move_operation('bfly6', -4) +schedule.move_operation('bfly1', -1) +schedule.move_operation('addsub1', -1) +schedule.move_operation('cmul1', -1) +schedule.move_operation('cmul2', -3) +schedule.move_operation('addsub2', -1) +schedule.move_operation('bfly2', -1) +schedule.move_operation('bfly1', -1) +schedule.move_operation('cmul1', -1) +schedule.move_operation('addsub2', -1) +schedule.move_operation('addsub4', -1) +schedule.move_operation('addsub4', -3) +schedule.move_operation('cmul4', -1) +schedule.move_operation('bfly1', -2) +schedule.move_operation('cmul2', -1) +schedule.move_operation('cmul1', -2) +schedule.move_operation('cmul5', -4) +schedule.move_operation('cmul1', 1) +schedule.move_operation('cmul3', -5) +schedule.move_operation('cmul5', 2) +schedule.move_operation('addsub3', -3) +schedule.move_operation('addsub1', -3) +schedule.move_operation('addsub2', -1) +schedule.move_operation('addsub3', -4) +schedule.move_operation('bfly2', -2) +schedule.move_operation('addsub5', -3) +schedule.move_operation('bfly3', -2) +schedule.show() + +# Extract memory variables and operation executions +operations = schedule.get_operations() +adders = operations.get_by_type_name(AddSub.type_name()) +adders.show(title="AddSub executions") +mults = operations.get_by_type_name('cmul') +mults.show(title="Multiplier executions") +butterflies = operations.get_by_type_name(Butterfly.type_name()) +butterflies.show(title="Butterfly executions") +inputs = operations.get_by_type_name('in') +inputs.show(title="Input executions") +outputs = operations.get_by_type_name('out') +outputs.show(title="Output executions") + +addsub = ProcessingElement(adders, entity_name="addsub") +butterfly = ProcessingElement(butterflies, entity_name="butterfly") +multiplier = ProcessingElement(mults, entity_name="multiplier") +pe_in = ProcessingElement(inputs, entity_name='input') +pe_out = ProcessingElement(outputs, entity_name='output') + +mem_vars = schedule.get_memory_variables() +mem_vars.show(title="All memory variables") +direct, mem_vars = mem_vars.split_on_length() +mem_vars.show(title="Non-zero time memory variables") +mem_vars_set = mem_vars.split_on_ports(read_ports=1, write_ports=1, total_ports=2) + +memories = [] +for i, mem in enumerate(mem_vars_set): + memory = Memory(mem, memory_type="RAM", entity_name=f"memory{i}") + memories.append(memory) + mem.show(title=f"{memory.entity_name}") + memory.assign("left_edge") + memory.show_content(title=f"Assigned {memory.entity_name}") + +direct.show(title="Direct interconnects") + +arch = Architecture( + {addsub, butterfly, multiplier, pe_in, pe_out}, + memories, + direct_interconnects=direct, +) + +arch diff --git a/examples/threepointwinograddft.py b/examples/threepointwinograddft.py index 68b8afbd3997c4b05476fbedea19e8ca9b0ac6c5..f817b31aff51023e7a742714170b00d6acab4b9a 100644 --- a/examples/threepointwinograddft.py +++ b/examples/threepointwinograddft.py @@ -6,7 +6,8 @@ Three-point Winograd DFT from math import cos, pi, sin -from b_asic.core_operations import Addition, ConstantMultiplication, Subtraction +from b_asic.architecture import Architecture, Memory, ProcessingElement +from b_asic.core_operations import AddSub, ConstantMultiplication from b_asic.schedule import Schedule from b_asic.signal_flow_graph import SFG from b_asic.special_operations import Input, Output @@ -19,14 +20,14 @@ c31 = sin(u) in0 = Input("x0") in1 = Input("x1") in2 = Input("x2") -a0 = in1 + in2 -a1 = in1 - in2 -a2 = a0 + in0 +a0 = AddSub(True, in1, in2) +a1 = AddSub(False, in1, in2) +a2 = AddSub(True, a0, in0) m0 = c30 * a0 m1 = c31 * a1 -a3 = a2 + m0 -a4 = a3 + m1 -a5 = a3 - m1 +a3 = AddSub(True, a2, m0) +a4 = AddSub(True, a3, m1) +a5 = AddSub(False, a3, m1) out0 = Output(a2, "X0") out1 = Output(a4, "X1") out2 = Output(a5, "X2") @@ -44,15 +45,87 @@ sfg # %% # Set latencies and execution times sfg.set_latency_of_type(ConstantMultiplication.type_name(), 2) -sfg.set_latency_of_type(Addition.type_name(), 1) -sfg.set_latency_of_type(Subtraction.type_name(), 1) +sfg.set_latency_of_type(AddSub.type_name(), 1) sfg.set_execution_time_of_type(ConstantMultiplication.type_name(), 1) -sfg.set_execution_time_of_type(Addition.type_name(), 1) -sfg.set_execution_time_of_type(Subtraction.type_name(), 1) +sfg.set_execution_time_of_type(AddSub.type_name(), 1) # %% # Generate schedule schedule = Schedule(sfg, cyclic=True) schedule.show() -pc = schedule.get_memory_variables() +# Reschedule to only use one AddSub and one multiplier +schedule.set_schedule_time(10) +schedule.move_operation('out2', 3) +schedule.move_operation('out3', 4) +schedule.move_operation('addsub5', 2) +schedule.move_operation('addsub4', 3) +schedule.move_operation('addsub3', 2) +schedule.move_operation('cmul2', 2) +schedule.move_operation('cmul1', 2) +schedule.move_operation('out1', 5) +schedule.move_operation('addsub1', 3) +schedule.move_operation('addsub6', 2) +schedule.move_operation('addsub2', 2) +schedule.move_operation('in2', 1) +schedule.move_operation('in3', 2) +schedule.move_operation('cmul2', 1) +schedule.move_operation('out3', 6) +schedule.move_operation('out2', 6) +schedule.move_operation('out1', 6) +schedule.move_operation('addsub6', 1) +schedule.move_operation('addsub4', 3) +schedule.move_operation('addsub5', 4) +schedule.move_operation('addsub4', 1) +schedule.move_operation('addsub5', 4) +schedule.move_operation('cmul2', 3) +schedule.move_operation('addsub4', 2) +schedule.move_operation('cmul2', 3) +schedule.move_operation('addsub3', 5) +schedule.set_schedule_time(6) +schedule.move_operation('addsub1', 1) +schedule.move_operation('addsub4', -1) +schedule.move_operation('cmul2', -2) +schedule.move_operation('addsub4', -1) +schedule.move_operation('addsub1', -1) +schedule.move_operation('addsub3', -1) +schedule.move_operation('addsub5', -4) +schedule.show() + +# Extract memory variables and operation executions +operations = schedule.get_operations() +adders = operations.get_by_type_name(AddSub.type_name()) +adders.show(title="AddSub executions") +mults = operations.get_by_type_name('cmul') +mults.show(title="Multiplier executions") +inputs = operations.get_by_type_name('in') +inputs.show(title="Input executions") +outputs = operations.get_by_type_name('out') +outputs.show(title="Output executions") + +addsub = ProcessingElement(adders, entity_name="addsub") +multiplier = ProcessingElement(mults, entity_name="multiplier") +pe_in = ProcessingElement(inputs, entity_name='input') +pe_out = ProcessingElement(outputs, entity_name='output') + +mem_vars = schedule.get_memory_variables() +mem_vars.show(title="All memory variables") +direct, mem_vars = mem_vars.split_on_length() +mem_vars.show(title="Non-zero time memory variables") +mem_vars_set = mem_vars.split_on_ports(read_ports=1, write_ports=1, total_ports=2) + +memories = [] +for i, mem in enumerate(mem_vars_set): + memory = Memory(mem, memory_type="RAM", entity_name=f"memory{i}") + memories.append(memory) + mem.show(title=f"{memory.entity_name}") + memory.assign("left_edge") + memory.show_content(title=f"Assigned {memory.entity_name}") + +direct.show(title="Direct interconnects") + +arch = Architecture( + {addsub, multiplier, pe_in, pe_out}, memories, direct_interconnects=direct +) + +arch