diff --git a/b_asic/_preferences.py b/b_asic/_preferences.py
index d3449aefb2cb5734e27f4f813264b1ccdbd1011b..004e498ccf997bec68298ef88ac73e7ee19d962a 100644
--- a/b_asic/_preferences.py
+++ b/b_asic/_preferences.py
@@ -11,6 +11,7 @@ SCHEDULE_OFFSET: float = 0.2
 
 SPLINE_OFFSET: float = 0.2
 
+# Colors for architecture Digraph
 PE_COLOR = (0, 185, 231)  # LiuBlue
 PE_CLUSTER_COLOR = (210, 238, 249)  # LiuBlue5
 MEMORY_COLOR = (0, 207, 181)  # LiuGreen
diff --git a/b_asic/architecture.py b/b_asic/architecture.py
index b2938fdea1f0d9d793bd526691a3749cf1ec1da3..2edcd3525d483b3e67b356a9498db04734d1fdf6 100644
--- a/b_asic/architecture.py
+++ b/b_asic/architecture.py
@@ -773,7 +773,7 @@ of :class:`~b_asic.architecture.ProcessingElement`
         cluster: bool = True,
         splines: str = "spline",
         io_cluster: bool = True,
-        show_multiplexers: bool = True,
+        multiplexers: bool = True,
         colored: bool = True,
     ) -> Digraph:
         """
@@ -786,8 +786,9 @@ of :class:`~b_asic.architecture.ProcessingElement`
         splines : str, default: "spline"
             The type of interconnect to use for graph drawing.
         io_cluster : bool, default: True
-            Whether Inputs and Outputs are drawn inside an IO cluster.
-        show_multiplexers : bool, default: True
+            Whether Inputs and Outputs are drawn inside an IO cluster. Only relevant
+            if *cluster* is True.
+        multiplexers : bool, default: True
             Whether input multiplexers are included.
         colored : bool, default: True
             Whether to color the nodes.
@@ -916,7 +917,7 @@ of :class:`~b_asic.architecture.ProcessingElement`
                     destination_edges[destination_str].add(source_str)
 
         destination_list = {k: list(v) for k, v in destination_edges.items()}
-        if show_multiplexers:
+        if multiplexers:
             for destination, source_list in destination_list.items():
                 if len(source_list) > 1:
                     # Create GraphViz struct for multiplexer
@@ -940,7 +941,7 @@ of :class:`~b_asic.architecture.ProcessingElement`
                 dg.edge(src_str, branch, arrowhead='none')
                 src_str = branch
             for destination_str, cnt_str in destination_counts:
-                if show_multiplexers and len(destination_list[destination_str]) > 1:
+                if multiplexers and len(destination_list[destination_str]) > 1:
                     idx = destination_list[destination_str].index(original_src_str)
                     destination_str = f"{destination_str.replace(':', '_')}_mux:in{idx}"
                 dg.edge(src_str, destination_str, label=cnt_str)
diff --git a/b_asic/process.py b/b_asic/process.py
index 5a1cc620a2a9d7916f61c0458befd114fcb32efd..bf0c2ef3c9c3df50919bdf989e8990d5ba4c2c7a 100644
--- a/b_asic/process.py
+++ b/b_asic/process.py
@@ -168,7 +168,7 @@ class MemoryProcess(Process):
         Parameters
         ----------
         length : int, default: 0
-            The life time length to split on. Length is inclusive for the smaller
+            The lifetime length to split on. Length is inclusive for the smaller
             process.
 
         Returns
diff --git a/b_asic/resources.py b/b_asic/resources.py
index d655175c6899fbcf6654dc606e1a71777820590a..51d05523014e98aea023cc04c8266ab95fad56d5 100644
--- a/b_asic/resources.py
+++ b/b_asic/resources.py
@@ -762,9 +762,10 @@ class ProcessCollection:
         exclusion_graph = nx.Graph()
         exclusion_graph.add_nodes_from(self._collection)
         for node1 in exclusion_graph:
-            node1_stop_times = tuple(
+            node1_stop_times = set(
                 read_time % self.schedule_time for read_time in node1.read_times
             )
+            node1_start_time = node1.start_time % self.schedule_time
             if total_ports == 1 and node1.start_time in node1_stop_times:
                 print(node1.start_time, node1_stop_times)
                 raise ValueError("Cannot read and write in same cycle.")
@@ -775,24 +776,18 @@ class ProcessCollection:
                     node2_stop_times = tuple(
                         read_time % self.schedule_time for read_time in node2.read_times
                     )
-                    for node1_stop_time in node1_stop_times:
-                        for node2_stop_time in node2_stop_times:
-                            if total_ports == 1:
-                                # Single-port assignment
-                                if node1.start_time == node2.start_time:
-                                    exclusion_graph.add_edge(node1, node2)
-                                elif node1_stop_time == node2_stop_time:
-                                    exclusion_graph.add_edge(node1, node2)
-                                elif node1.start_time == node2_stop_time:
-                                    exclusion_graph.add_edge(node1, node2)
-                                elif node1_stop_time == node2.start_time:
-                                    exclusion_graph.add_edge(node1, node2)
-                            else:
-                                # Dual-port assignment
-                                if node1.start_time == node2.start_time:
-                                    exclusion_graph.add_edge(node1, node2)
-                                elif node1_stop_time == node2_stop_time:
-                                    exclusion_graph.add_edge(node1, node2)
+                    node2_start_time = node2.start_time % self.schedule_time
+                    if write_ports == 1 and node1_start_time == node2_start_time:
+                        exclusion_graph.add_edge(node1, node2)
+                    if read_ports == 1 and node1_stop_times.intersection(
+                        node2_stop_times
+                    ):
+                        exclusion_graph.add_edge(node1, node2)
+                    if total_ports == 1 and (
+                        node1_start_time in node2_stop_times
+                        or node2_start_time in node1_stop_times
+                    ):
+                        exclusion_graph.add_edge(node1, node2)
         return exclusion_graph
 
     def create_exclusion_graph_from_execution_time(self) -> nx.Graph:
diff --git a/b_asic/schedule.py b/b_asic/schedule.py
index 242a9525e23b19ffa6777bc7043ea185d634032b..253c107460ce2e78e3d0ddc18b7bd09ba797294b 100644
--- a/b_asic/schedule.py
+++ b/b_asic/schedule.py
@@ -338,10 +338,10 @@ class Schedule:
         --------
         get_max_time
         """
-        if time < self.get_max_end_time():
+        max_end_time = self.get_max_end_time()
+        if time < max_end_time:
             raise ValueError(
-                f"New schedule time ({time}) too short, minimum:"
-                f" {self.get_max_end_time()}."
+                f"New schedule time ({time}) too short, minimum: {max_end_time}."
             )
         self._schedule_time = time
         return self
@@ -758,7 +758,8 @@ class Schedule:
                 }
                 ret.append(
                     MemoryVariable(
-                        start_time + cast(int, outport.latency_offset),
+                        (start_time + cast(int, outport.latency_offset))
+                        % self.schedule_time,
                         outport,
                         reads,
                         outport.name,
diff --git a/examples/fivepointwinograddft.py b/examples/fivepointwinograddft.py
new file mode 100644
index 0000000000000000000000000000000000000000..8093a2e121ea33ba06d71768605c464f8913b789
--- /dev/null
+++ b/examples/fivepointwinograddft.py
@@ -0,0 +1,211 @@
+"""
+=======================
+Five-point Winograd DFT
+=======================
+"""
+
+from math import cos, pi, sin
+
+from b_asic.architecture import Architecture, Memory, ProcessingElement
+from b_asic.core_operations import AddSub, Butterfly, ConstantMultiplication
+from b_asic.schedule import Schedule
+from b_asic.signal_flow_graph import SFG
+from b_asic.special_operations import Input, Output
+
+u = -2 * pi / 5
+c50 = (cos(u) + cos(2 * u)) / 2 - 1
+c51 = (cos(u) - cos(2 * u)) / 2
+c52 = 1j * (sin(u) + sin(2 * u)) / 2
+c53 = 1j * (sin(2 * u))
+c54 = 1j * (sin(u) - sin(2 * u))
+
+
+in0 = Input("x0")
+in1 = Input("x1")
+in2 = Input("x2")
+in3 = Input("x3")
+in4 = Input("x4")
+bf0 = Butterfly(in1, in3)
+bf1 = Butterfly(in4, in2)
+bf2 = Butterfly(bf0.output(0), bf1.output(0))
+a0 = AddSub(True, bf0.output(1), bf1.output(0))
+a1 = AddSub(True, bf2.output(0), in0)
+# Should overload float*OutputPort as well
+m0 = ConstantMultiplication(c50, bf2.output(0))
+m1 = ConstantMultiplication(c51, bf0.output(1))
+m2 = c52 * a0
+m3 = ConstantMultiplication(c53, bf2.output(1))
+m4 = ConstantMultiplication(c54, bf1.output(1))
+a2 = AddSub(True, m0, a1)
+a3 = AddSub(False, m3, m2)
+a4 = AddSub(True, m3, m4)
+bf3 = Butterfly(a2, m1)
+bf4 = Butterfly(bf3.output(0), a3)
+bf5 = Butterfly(bf3.output(1), a4)
+
+out0 = Output(a1, "X0")
+out1 = Output(bf4.output(0), "X1")
+out2 = Output(bf4.output(1), "X2")
+out4 = Output(bf5.output(0), "X4")
+out3 = Output(bf5.output(1), "X3")
+
+sfg = SFG(
+    inputs=[in0, in1, in2, in3, in4],
+    outputs=[out0, out1, out2, out3, out4],
+    name="5-point Winograd DFT",
+)
+
+# %%
+# The SFG looks like
+sfg
+
+# %%
+# Set latencies and execution times
+sfg.set_latency_of_type(ConstantMultiplication.type_name(), 2)
+sfg.set_latency_of_type(AddSub.type_name(), 1)
+sfg.set_latency_of_type(Butterfly.type_name(), 1)
+sfg.set_execution_time_of_type(ConstantMultiplication.type_name(), 1)
+sfg.set_execution_time_of_type(AddSub.type_name(), 1)
+sfg.set_execution_time_of_type(Butterfly.type_name(), 1)
+
+# %%
+# Generate schedule
+schedule = Schedule(sfg, cyclic=True)
+schedule.show()
+
+# Reschedule to only use one AddSub and one multiplier
+
+schedule.move_operation('out2', 4)
+schedule.move_operation('out3', 4)
+schedule.move_operation('out4', 3)
+schedule.move_operation('out5', 6)
+schedule.set_schedule_time(15)
+schedule.move_operation('out5', 3)
+schedule.move_operation('out4', 5)
+schedule.move_operation('out3', 3)
+schedule.move_operation('out2', 2)
+schedule.move_operation('out1', 2)
+schedule.move_operation('bfly4', 16)
+schedule.move_operation('bfly3', 14)
+schedule.move_operation('bfly2', 14)
+schedule.move_operation('addsub3', 17)
+schedule.move_operation('addsub5', 15)
+schedule.move_operation('addsub2', 14)
+schedule.move_operation('cmul5', 15)
+schedule.move_operation('cmul3', 15)
+schedule.move_operation('cmul1', 14)
+schedule.move_operation('addsub1', 2)
+schedule.move_operation('cmul2', 16)
+schedule.move_operation('addsub4', 15)
+schedule.move_operation('out1', 15)
+schedule.move_operation('addsub1', 13)
+schedule.move_operation('cmul4', 18)
+schedule.move_operation('bfly1', 14)
+schedule.move_operation('bfly6', 14)
+schedule.move_operation('bfly5', 14)
+schedule.move_operation('in5', 1)
+schedule.move_operation('in3', 2)
+schedule.move_operation('in2', 3)
+schedule.move_operation('in4', 4)
+schedule.move_operation('bfly6', -5)
+schedule.move_operation('bfly5', -6)
+schedule.move_operation('addsub1', -1)
+schedule.move_operation('bfly1', -1)
+schedule.move_operation('bfly1', -4)
+schedule.move_operation('addsub1', -5)
+schedule.move_operation('addsub4', -6)
+schedule.move_operation('cmul4', -10)
+schedule.move_operation('cmul2', -7)
+schedule.move_operation('cmul1', -2)
+schedule.move_operation('cmul3', -6)
+schedule.move_operation('cmul5', -5)
+schedule.move_operation('cmul1', -3)
+schedule.move_operation('cmul5', -1)
+schedule.set_schedule_time(13)
+schedule.move_operation('bfly5', -6)
+schedule.move_operation('bfly6', -1)
+schedule.move_operation('cmul4', -6)
+schedule.move_operation('addsub1', 4)
+schedule.move_operation('cmul3', 4)
+schedule.move_operation('cmul1', 3)
+schedule.move_operation('bfly1', 3)
+schedule.move_operation('cmul2', 5)
+schedule.move_operation('cmul5', 4)
+schedule.move_operation('addsub4', 4)
+schedule.set_schedule_time(10)
+schedule.move_operation('addsub1', -1)
+schedule.move_operation('cmul4', 1)
+schedule.move_operation('addsub4', -1)
+schedule.move_operation('cmul5', -1)
+schedule.move_operation('cmul2', -2)
+schedule.move_operation('bfly6', -4)
+schedule.move_operation('bfly1', -1)
+schedule.move_operation('addsub1', -1)
+schedule.move_operation('cmul1', -1)
+schedule.move_operation('cmul2', -3)
+schedule.move_operation('addsub2', -1)
+schedule.move_operation('bfly2', -1)
+schedule.move_operation('bfly1', -1)
+schedule.move_operation('cmul1', -1)
+schedule.move_operation('addsub2', -1)
+schedule.move_operation('addsub4', -1)
+schedule.move_operation('addsub4', -3)
+schedule.move_operation('cmul4', -1)
+schedule.move_operation('bfly1', -2)
+schedule.move_operation('cmul2', -1)
+schedule.move_operation('cmul1', -2)
+schedule.move_operation('cmul5', -4)
+schedule.move_operation('cmul1', 1)
+schedule.move_operation('cmul3', -5)
+schedule.move_operation('cmul5', 2)
+schedule.move_operation('addsub3', -3)
+schedule.move_operation('addsub1', -3)
+schedule.move_operation('addsub2', -1)
+schedule.move_operation('addsub3', -4)
+schedule.move_operation('bfly2', -2)
+schedule.move_operation('addsub5', -3)
+schedule.move_operation('bfly3', -2)
+schedule.show()
+
+# Extract memory variables and operation executions
+operations = schedule.get_operations()
+adders = operations.get_by_type_name(AddSub.type_name())
+adders.show(title="AddSub executions")
+mults = operations.get_by_type_name('cmul')
+mults.show(title="Multiplier executions")
+butterflies = operations.get_by_type_name(Butterfly.type_name())
+butterflies.show(title="Butterfly executions")
+inputs = operations.get_by_type_name('in')
+inputs.show(title="Input executions")
+outputs = operations.get_by_type_name('out')
+outputs.show(title="Output executions")
+
+addsub = ProcessingElement(adders, entity_name="addsub")
+butterfly = ProcessingElement(butterflies, entity_name="butterfly")
+multiplier = ProcessingElement(mults, entity_name="multiplier")
+pe_in = ProcessingElement(inputs, entity_name='input')
+pe_out = ProcessingElement(outputs, entity_name='output')
+
+mem_vars = schedule.get_memory_variables()
+mem_vars.show(title="All memory variables")
+direct, mem_vars = mem_vars.split_on_length()
+mem_vars.show(title="Non-zero time memory variables")
+mem_vars_set = mem_vars.split_on_ports(read_ports=1, write_ports=1, total_ports=2)
+
+memories = []
+for i, mem in enumerate(mem_vars_set):
+    memory = Memory(mem, memory_type="RAM", entity_name=f"memory{i}")
+    memories.append(memory)
+    mem.show(title=f"{memory.entity_name}")
+    memory.assign("left_edge")
+    memory.show_content(title=f"Assigned {memory.entity_name}")
+
+direct.show(title="Direct interconnects")
+
+arch = Architecture(
+    {addsub, butterfly, multiplier, pe_in, pe_out},
+    memories,
+    direct_interconnects=direct,
+)
+
+arch
diff --git a/examples/threepointwinograddft.py b/examples/threepointwinograddft.py
index 68b8afbd3997c4b05476fbedea19e8ca9b0ac6c5..f817b31aff51023e7a742714170b00d6acab4b9a 100644
--- a/examples/threepointwinograddft.py
+++ b/examples/threepointwinograddft.py
@@ -6,7 +6,8 @@ Three-point Winograd DFT
 
 from math import cos, pi, sin
 
-from b_asic.core_operations import Addition, ConstantMultiplication, Subtraction
+from b_asic.architecture import Architecture, Memory, ProcessingElement
+from b_asic.core_operations import AddSub, ConstantMultiplication
 from b_asic.schedule import Schedule
 from b_asic.signal_flow_graph import SFG
 from b_asic.special_operations import Input, Output
@@ -19,14 +20,14 @@ c31 = sin(u)
 in0 = Input("x0")
 in1 = Input("x1")
 in2 = Input("x2")
-a0 = in1 + in2
-a1 = in1 - in2
-a2 = a0 + in0
+a0 = AddSub(True, in1, in2)
+a1 = AddSub(False, in1, in2)
+a2 = AddSub(True, a0, in0)
 m0 = c30 * a0
 m1 = c31 * a1
-a3 = a2 + m0
-a4 = a3 + m1
-a5 = a3 - m1
+a3 = AddSub(True, a2, m0)
+a4 = AddSub(True, a3, m1)
+a5 = AddSub(False, a3, m1)
 out0 = Output(a2, "X0")
 out1 = Output(a4, "X1")
 out2 = Output(a5, "X2")
@@ -44,15 +45,87 @@ sfg
 # %%
 # Set latencies and execution times
 sfg.set_latency_of_type(ConstantMultiplication.type_name(), 2)
-sfg.set_latency_of_type(Addition.type_name(), 1)
-sfg.set_latency_of_type(Subtraction.type_name(), 1)
+sfg.set_latency_of_type(AddSub.type_name(), 1)
 sfg.set_execution_time_of_type(ConstantMultiplication.type_name(), 1)
-sfg.set_execution_time_of_type(Addition.type_name(), 1)
-sfg.set_execution_time_of_type(Subtraction.type_name(), 1)
+sfg.set_execution_time_of_type(AddSub.type_name(), 1)
 
 # %%
 # Generate schedule
 schedule = Schedule(sfg, cyclic=True)
 schedule.show()
 
-pc = schedule.get_memory_variables()
+# Reschedule to only use one AddSub and one multiplier
+schedule.set_schedule_time(10)
+schedule.move_operation('out2', 3)
+schedule.move_operation('out3', 4)
+schedule.move_operation('addsub5', 2)
+schedule.move_operation('addsub4', 3)
+schedule.move_operation('addsub3', 2)
+schedule.move_operation('cmul2', 2)
+schedule.move_operation('cmul1', 2)
+schedule.move_operation('out1', 5)
+schedule.move_operation('addsub1', 3)
+schedule.move_operation('addsub6', 2)
+schedule.move_operation('addsub2', 2)
+schedule.move_operation('in2', 1)
+schedule.move_operation('in3', 2)
+schedule.move_operation('cmul2', 1)
+schedule.move_operation('out3', 6)
+schedule.move_operation('out2', 6)
+schedule.move_operation('out1', 6)
+schedule.move_operation('addsub6', 1)
+schedule.move_operation('addsub4', 3)
+schedule.move_operation('addsub5', 4)
+schedule.move_operation('addsub4', 1)
+schedule.move_operation('addsub5', 4)
+schedule.move_operation('cmul2', 3)
+schedule.move_operation('addsub4', 2)
+schedule.move_operation('cmul2', 3)
+schedule.move_operation('addsub3', 5)
+schedule.set_schedule_time(6)
+schedule.move_operation('addsub1', 1)
+schedule.move_operation('addsub4', -1)
+schedule.move_operation('cmul2', -2)
+schedule.move_operation('addsub4', -1)
+schedule.move_operation('addsub1', -1)
+schedule.move_operation('addsub3', -1)
+schedule.move_operation('addsub5', -4)
+schedule.show()
+
+# Extract memory variables and operation executions
+operations = schedule.get_operations()
+adders = operations.get_by_type_name(AddSub.type_name())
+adders.show(title="AddSub executions")
+mults = operations.get_by_type_name('cmul')
+mults.show(title="Multiplier executions")
+inputs = operations.get_by_type_name('in')
+inputs.show(title="Input executions")
+outputs = operations.get_by_type_name('out')
+outputs.show(title="Output executions")
+
+addsub = ProcessingElement(adders, entity_name="addsub")
+multiplier = ProcessingElement(mults, entity_name="multiplier")
+pe_in = ProcessingElement(inputs, entity_name='input')
+pe_out = ProcessingElement(outputs, entity_name='output')
+
+mem_vars = schedule.get_memory_variables()
+mem_vars.show(title="All memory variables")
+direct, mem_vars = mem_vars.split_on_length()
+mem_vars.show(title="Non-zero time memory variables")
+mem_vars_set = mem_vars.split_on_ports(read_ports=1, write_ports=1, total_ports=2)
+
+memories = []
+for i, mem in enumerate(mem_vars_set):
+    memory = Memory(mem, memory_type="RAM", entity_name=f"memory{i}")
+    memories.append(memory)
+    mem.show(title=f"{memory.entity_name}")
+    memory.assign("left_edge")
+    memory.show_content(title=f"Assigned {memory.entity_name}")
+
+direct.show(title="Direct interconnects")
+
+arch = Architecture(
+    {addsub, multiplier, pe_in, pe_out}, memories, direct_interconnects=direct
+)
+
+arch