@InProceedings{Jakson05, title = "{R}16: a {N}ew {T}ransputer {D}esign for {FPGA}s", author= "Jakson, John", editor= "Broenink, Jan F. and Roebbers, Herman and Sunter, Johan P. E. and Welch, Peter H. and Wood, David C.", pages = "335--362", booktitle= "{C}ommunicating {P}rocess {A}rchitectures 2005", isbn= "978-1-58603-561-7", year= "2005", month= "sep", abstract= "This paper describes the ongoing development of a new FPGA hosted Transputer using a Load Store RISC style Multi Threaded Architecture (MTA). The memory system throughput is emphasized as much as the processor throughput and uses the recently developed Micron 32MByte RLDRAM which can start fully random memory cycles every 3.3ns with 20ns latency when driven by an FPGA controller. The R16 shares an object oriented Memory Manager Unit (MMU) amongst multiple low cost Processor Elements (PEs) until the MMU throughput limit is reached. The PE has been placed and routed at over 300MHz in a Xilinx Virtex-II Pro device and uses around 500 FPGA basic cells and 1 Block RAM. The 15 stage pipeline uses 2 clocks per instruction to greatly simplify the hardware design which allows for twice the clock frequency of other FPGA processors. There are instruction and cycle accurate simulators as well as a C compiler in development. The compiler can now emit optimized small functions needed for further hardware development although compiling itself requires much work. Some occam and Verilog language components will be added to the C base to allow a mixed occam and event driven processing model. Eventually it is planned to allow occam or Verilog source to run as software code or be placed as synthesized co processor hardware attached to the MMU." }