/*
 * Copyright (c) 2023-2025 LAAS/CNRS
 * All rights reserved.
 *
 * Redistribution  and  use  in  source  and binary  forms,  with  or  without
 * modification, are permitted provided that the following conditions are met:
 *
 *   1. Redistributions of  source  code must retain the  above copyright
 *      notice and this list of conditions.
 *   2. Redistributions in binary form must reproduce the above copyright
 *      notice and  this list of  conditions in the  documentation and/or
 *      other materials provided with the distribution.
 *
 * THE SOFTWARE  IS PROVIDED "AS IS"  AND THE AUTHOR  DISCLAIMS ALL WARRANTIES
 * WITH  REGARD   TO  THIS  SOFTWARE  INCLUDING  ALL   IMPLIED  WARRANTIES  OF
 * MERCHANTABILITY AND  FITNESS.  IN NO EVENT  SHALL THE AUTHOR  BE LIABLE FOR
 * ANY  SPECIAL, DIRECT,  INDIRECT, OR  CONSEQUENTIAL DAMAGES  OR  ANY DAMAGES
 * WHATSOEVER  RESULTING FROM  LOSS OF  USE, DATA  OR PROFITS,  WHETHER  IN AN
 * ACTION OF CONTRACT, NEGLIGENCE OR  OTHER TORTIOUS ACTION, ARISING OUT OF OR
 * IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 *
 *                                           Anthony Mallet on Mon Apr 17 2023
 */
#include "autoconf.h"

#include "string.h"

#include "tk3-paparazzi.h"

/*
 * This implements the low-level software to control up to 8 ESC via the
 * servoa[1-4] and servob[1-4] pins on the Tawaki paparazzi board. The goal is
 * to be able to either send PWM or DSHOT at differents rates, as well as
 * Bidirectional-DSHOT (hereafter BiDSHOT) developed by the BLHeli software
 * suite to benefit from high frequency feedback and implement a velocity
 * closed loop control. In order to minimize the required hardware resources,
 * DMA is used as much as possible.
 *
 * Design choices:
 *
 * Sending: TIM[13] PWM mode 1, preload enable, PWM/DSHOT update frequency.
 * Receving: TIM[13] input capture mode, both edges, free running (max period).
 *
 * TIM4 running at PWM/DSHOT frequency (output) or 3/2 BiDSHOT frequency
 * (i.e. 20% faster than the 5/4 BiDSHOT frequency to allow some jiter on
 * input). TIM4_CH[12] trigger DMA1 transfer, PBURST 4, to/from
 * TIM[13]_CCR[1-4] registers via TIM[13]_DMAR and TIMx_DCR.
 *
 * In PWM mode, desired throttle is sent with no CPU overhead at the configured
 * frequency (20-490Hz). In DSHOT mode, the desired throttle is sent once
 * by the hardware-independant layer, at the sender's frequency. For BiDSHOT,
 * one TIM4_UP interrupt is triggered after the sending to reconfigure timers
 * for input mode.
 *
 * Notes about the design choices:
 *
 * servo[ab][1-4] are connected to GPIOs that map to the 4 channels CH[1-4] of
 * TIM1 and TIM3. To send PWM/DSHOT, only 2 DMA1 channels (TIM[13]_UP) are
 * required, by using the timer 'DMA burst' (TIMx_DMAR/DCR) feature to write to
 * TIMx_CCR[1-4] at each timer update event. However, capturing the BiDSHOT
 * telemetry is more complex. By using TIMx input capture mode on both rising
 * and falling edges, and a higher frequency than the theoretical signal
 * period, one could use the same DMA burst feature to transfer TIMx_CCR[1-4]
 * at update event. But there is no robust way to distinguish between a true,
 * new capture event and and an old one that would by chance have occured at
 * the same timestamp:
 *
 *   ---+   +---+---+---+  <-- input signal
 *      |   |           |
 *      +---+           +---
 *  --|-1-|-2-|---|---|-3-|-----------> TIMx (|=TIMx_UP, [1-3]=capture event)
 *
 * In this example, reading CCRn for event #3 is ambigous, as it would have
 * the same timestamp as event 2 and thus undistinguishable from the two
 * non-events in between. Reading TIMx_SR along with TIMx_CCRn with a single
 * DMA burst transfer would solve this, however is is impractical as this
 * register is at a very different offset from CCR[1-4] and this would consume
 * lots of memory (48 bytes for each period, so at least 1.5kB for a whole
 * BiDSHOT message including some temporal safety margins).
 *
 * The chosen design uses instead one extra timer to set the pace of the
 * transfers and configures TIM[13] to free running, so that the timer will
 * not overflow during the whole transfer. This makes capured events in
 * TIMx_CCRn unambiguous. The pacer needs two DMA streams to read from
 * TIM[13]_CCR[1-4]. TIM4 in combination with DMA1 TIM4_CH1 and TIM4_CH2,
 * can conveniently be chosen. Fortunately, those channels can access
 * TIM[13]_DMAR and the internal timer index works even though it is accessed
 * by an unrelated DMA transfer. The DMA is not automatically 'bursted' by the
 * TIM[13] timers though, but this can be addressed by configuring manually a
 * PBURST of 4 transfers in DMA_CR.
 *
 * Finally, the same strategy can be used to send the PWM/DSHOT
 * messages. Altough this is not necessary, this conveniently minimizes the
 * number of required DMA channels.
 */

/* ensure this triggers a redifinition error if this is defined */
#define STM32_TIM1_IS_USED	tk3srv
#define STM32_TIM3_IS_USED	tk3srv
#define STM32_TIM4_IS_USED	tk3srv


/* DMA1 buffers: 2 streams, 4 servos/stream, 40 PWM/DSHOT bits max */
#define IOMEM_BITS	40
#define IOMEM_MAXDTIME	24
static uint16_t tk3_nocache(iomem[2][4 * (IOMEM_BITS + IOMEM_MAXDTIME)]);

/* I/O lines */
static const struct { ioline_t line; iomode_t mode; } iolines[] = {
  { .line = PAL_LINE(GPIOE,  9), .mode = PAL_MODE_ALTERNATE(1) },
  { .line = PAL_LINE(GPIOE, 11), .mode = PAL_MODE_ALTERNATE(1) },
  { .line = PAL_LINE(GPIOE, 13), .mode = PAL_MODE_ALTERNATE(1) },
  { .line = PAL_LINE(GPIOE, 14), .mode = PAL_MODE_ALTERNATE(1) },
  { .line = PAL_LINE(GPIOA,  6), .mode = PAL_MODE_ALTERNATE(2) },
  { .line = PAL_LINE(GPIOA,  7), .mode = PAL_MODE_ALTERNATE(2) },
  { .line = PAL_LINE(GPIOB,  0), .mode = PAL_MODE_ALTERNATE(2) },
  { .line = PAL_LINE(GPIOB,  1), .mode = PAL_MODE_ALTERNATE(2) }
};

/* I/O devices */
static struct iodev {
  TIM_TypeDef * const tim;		/* TIMx */
  const stm32_dma_stream_t *dma;	/* DMA stream */
  const uint32_t sid, mux;		/* DMA stream id and mux */
  uint16_t * const m0ar;		/* DMA source address */
  struct { uint32_t in, out; } CR;	/* DMA_CR mode */
} iodev[] = {
  { .tim = TIM1,
    .sid = STM32_GPT_TIM4_CH1_DMA_STREAM, .mux = STM32_DMAMUX1_TIM4_CH1,
    .m0ar = iomem[0] },
  { .tim = TIM3,
    .sid = STM32_GPT_TIM4_CH2_DMA_STREAM, .mux = STM32_DMAMUX1_TIM4_CH2,
    .m0ar = iomem[1] }
};

/* since all timer clocks are configured at 240MHz in mcuconf.h and this is not
 * going to change, is is assumed for simplicity that TIM1 and TIM3 clocks are
 * the same, even though they are not on the same APB bus. */
#define TK3SRV_TIMCLK STM32_TIMCLK1
#if STM32_TIMCLK1 != STM32_TIMCLK2
# error "TIM1 and TIM3 clock not equal"
#endif

/* I/O configuration */
static struct {
  struct { uint16_t in, out; } ARR;	/* TIMx_ARR for in/out */
  struct { uint16_t out; } CCER;	/* TIMx_CCER for output */
  struct { uint16_t in, out[2]; } CCR;	/* TIMx_CCRn for low(0)/high(1) bits */
  uint16_t dtime;			/* delay in bits before rcv (bidshot) */

  void (*send)(uint16_t throt[8]);	/* mode-dependant functions */
  uint16_t (*msg)(uint16_t throt, bool tlm);
} iocfg;

static void	tk3srv_iodevout(struct iodev *dev);
static void	tk3srv_iodevin(struct iodev *dev);

static void	tk3srv_sendpwm(uint16_t throt[8]);
static void	tk3srv_senddshot(uint16_t throt[8]);

static void	tk3srv_recv(void);
static void	tk3srv_dmatc(void *arg, uint32_t flags);


/* --- tk3srv_hwinit ------------------------------------------------------- */

int
tk3srv_hwinit(enum tk3srvmode mode, uint32_t freq)
{
  uint32_t p;
  int i;

  /* enable timer clocks */
  rccEnableTIM1(true);
  rccResetTIM1();
  rccEnableTIM3(true);
  rccResetTIM3();
  rccEnableTIM4(true);
  rccResetTIM4();
  TIM1->BDTR = STM32_TIM_BDTR_AOE;

  /* timer frequencies */
  switch(mode) {
    case TK3SRV_OFF: return 0;

    case TK3SRV_PWM:
      /* minimum clock prescaler to get desired freq within 16 bits ARR */
      p = (TK3SRV_TIMCLK - 1) / (freq<<16) + 1;
      iodev[0].tim->PSC = iodev[1].tim->PSC = p - 1;

      /* ARR value to achieve desired freq, rounded to nearest, effectively
       * computing ARR = CLK_INT/freq = (TK3SRV_TIMCLK/p)/freq */
      p *= freq;
      p = (TK3SRV_TIMCLK + p/2)/p;

      iocfg.ARR.out = p - 1;
      iocfg.ARR.in = 0; /* N/A */

      /* min 1ms/max 2ms pulse width, rounded to nearest, i.e.:
       * CCR = 0.001 / (1/freq) * (ARR+1) = (ARR+1) * freq / 1000 */
      p *= freq;
      iocfg.CCR.out[0] = (p + 500)/ 1000; /* 1ms */
      iocfg.CCR.out[1] = iocfg.CCR.out[0]; /* delta to reach 2ms from 1ms */

      iocfg.CCR.in = 0; /* N/A */
      break;

    case TK3SRV_DSHOT:
    case TK3SRV_BIDSHOT:
      /* min freq is at least 150kHz, this means PSC = 0 at 240MHz */
      iodev[0].tim->PSC = iodev[1].tim->PSC = 0;

      /* ARR value to achieve the desired freq, rounded to nearest, i.e.:
       * ARR = CLK_INT/freq = TK3SRV_TIMCLK/freq */
      p = freq * 1000;
      p = (TK3SRV_TIMCLK + p/2) / p;
      iocfg.ARR.out = p - 1;

      /* BiDSHOT telemetry frequency is 5/4 DSHOT.
       * Reading at 3/2 DSHOT frequency will allow some jitter. */
      iocfg.ARR.in = p * 2/3 - 1;

      /* DSHOT bits are 37.5% (3/8) for 0/low and 75% (3/4) for 1/high. */
      iocfg.CCR.out[0] = 3*p/8;
      iocfg.CCR.out[1] = 3*p/4;

      /* BiDSHOT telemetry bit length is 4/5 period */
      iocfg.CCR.in = 4*p/5;
      break;
  }

  /* output mode, 4 channels enabled */
  iocfg.CCER.out =
    STM32_TIM_CCER_CC1E | STM32_TIM_CCER_CC2E |
    STM32_TIM_CCER_CC3E | STM32_TIM_CCER_CC4E;
  if (mode == TK3SRV_BIDSHOT) {
    /* reverted output: active low */
    iocfg.CCER.out |=
      STM32_TIM_CCER_CC1P | STM32_TIM_CCER_CC2P |
      STM32_TIM_CCER_CC3P | STM32_TIM_CCER_CC4P;
  }

  /* setup DMA channels */
  for(i = 0; i < 2; i++) {
    switch(mode) {
      case TK3SRV_OFF:
      case TK3SRV_PWM:
        /* no DMA required for this mode */
        iodev[i].dma = NULL;
        break;

      case TK3SRV_DSHOT:
      case TK3SRV_BIDSHOT:
        iodev[i].dma = dmaStreamAlloc(
          iodev[i].sid, STM32_GPT_TIM4_DMA_IRQ_PRIORITY,
          mode == TK3SRV_BIDSHOT && i == 1 ? tk3srv_dmatc : NULL, NULL);
        if (!iodev[i].dma) {
          tk3fb_on(TK3FB_ESC_ERR);
          tk3msg_log(TK3CH_USB0, "EDMA configuration error");
          return 1;
        }
        dmaSetRequestSource(iodev[i].dma, iodev[i].mux);

        /* mode: set TCIE for BiDSHOT to reprogram timers for input */
        iodev[i].CR.out |=
          STM32_DMA_CR_PSIZE_HWORD | STM32_DMA_CR_MSIZE_HWORD |
          STM32_DMA_CR_MINC | STM32_DMA_CR_DIR_M2P |
          STM32_DMA_CR_MBURST_INCR4 |
          STM32_DMA_CR_PBURST_INCR4 | /* 4 bursts to TIM->DMAR */
          STM32_DMA_CR_PL(STM32_GPT_TIM4_DMA_PRIORITY) |
          (mode == TK3SRV_BIDSHOT && i == 1 ? STM32_DMA_CR_TCIE : 0) |
          STM32_DMA_CR_EN;
        iodev[i].CR.in =
          STM32_DMA_CR_PSIZE_HWORD | STM32_DMA_CR_MSIZE_HWORD |
          STM32_DMA_CR_MINC | STM32_DMA_CR_DIR_P2M |
          STM32_DMA_CR_MBURST_INCR4 |
          STM32_DMA_CR_PBURST_INCR4 | /* 4 bursts to TIM->DMAR */
          STM32_DMA_CR_PL(STM32_GPT_TIM4_DMA_PRIORITY) |
          STM32_DMA_CR_EN;

        /* full FIFO, 4 transfers from/to TIMx_DMAR */
        dmaStreamSetFIFO(
          iodev[i].dma, STM32_DMA_FCR_DMDIS | STM32_DMA_FCR_FTH_FULL);
        dmaStreamSetMemory0(iodev[i].dma, iodev[i].m0ar);
        dmaStreamSetPeripheral(iodev[i].dma, &iodev[i].tim->DMAR);
        iodev[i].tim->DCR =
          ((&iodev[i].tim->CCR1 - &iodev[i].tim->CR1) << TIM_DCR_DBA_Pos) |
          (3 << TIM_DCR_DBL_Pos);
        break;
    }
  }

  /* configure pacing timer */
  TIM4->CR1 = STM32_TIM_CR1_URS /* no DMA with UG bit */;
  TIM4->CR2 = STM32_TIM_CR2_MMS(1); /* TRGO enable */
  TIM4->PSC = 0; /* same tick as TIM[13] in dhsot mode */

  TIM4->CCER = STM32_TIM_CCER_CC1E | STM32_TIM_CCER_CC2E;
  TIM4->CCR1 = 1; /* arbitrary trigger far from next TIM[13]_UP */
  TIM4->CCR2 = 9; /* shift CH2 to avoid DMA contention with CH1 */
  TIM4->DIER = 0;

  /* dshot dead time: 18µs delay (theoretical 30µs) to allow some jitter */
  iocfg.dtime =
    mode == TK3SRV_BIDSHOT ? 18 * freq / 1000 : 1 /* reset bit */;
  if (iocfg.dtime > IOMEM_MAXDTIME) {
    tk3msg_log(TK3CH_USB0, "EDMA buffers too small");
    return 1;
  }

  /* synchronize output timers to pacing timer: TIM4 -> TIM3 -> TIM1 */
  TIM1->SMCR =
    STM32_TIM_SMCR_SMS(8 /* reset + trigger */) |
    STM32_TIM_SMCR_TS(2 /* TIM3 */);
  TIM3->SMCR =
    STM32_TIM_SMCR_SMS(8 /* reset + trigger */) |
    STM32_TIM_SMCR_TS(3 /* TIM4 */) |
    STM32_TIM_SMCR_MSM;

  /* configure lines */
  for(i = 0; i < 8; i++)
    palSetLineMode(iolines[i].line, iolines[i].mode);

  /* configure for output by default, only BiDSHOT will alter this */
  tk3srv_iodevout(&iodev[0]);
  tk3srv_iodevout(&iodev[1]);

  /* set mode-dependendant functions */
  switch(mode) {
    case TK3SRV_OFF:     iocfg.send = NULL; break;
    case TK3SRV_PWM:     iocfg.send = tk3srv_sendpwm; break;
    case TK3SRV_DSHOT:
    case TK3SRV_BIDSHOT: iocfg.send = tk3srv_senddshot; break;
  }
  switch(mode) {
    case TK3SRV_OFF:
    case TK3SRV_PWM:     iocfg.msg = NULL; break;
    case TK3SRV_DSHOT:   iocfg.msg = tk3srv_dshotmsg; break;
    case TK3SRV_BIDSHOT: iocfg.msg = tk3srv_bidshotmsg; break;
  }

  return 0;
}


/* --- tk3srv_hwfini ------------------------------------------------------- */

int
tk3srv_hwfini()
{
  int i;

  iocfg.send = NULL;
  nvicDisableVector(STM32_TIM4_NUMBER);

  for(i = 0; i < 2; i++) {
    if (iodev[i].dma) {
      dmaStreamDisable(iodev[i].dma);
      dmaStreamFree(iodev[i].dma);
    }
    iodev[i].tim->CR1 &= ~TIM_CR1_CEN;
  }
  TIM4->CR1 &= ~TIM_CR1_CEN;

  rccDisableTIM4();
  rccDisableTIM3();
  rccDisableTIM1();

  for(i = 0; i < 8; i++)
    palSetLineMode(iolines[i].line, PAL_MODE_INPUT);

  return 0;
}


/* --- tk3srv_iodevout ----------------------------------------------------- */

/* configure timers for output */

static void
tk3srv_iodevout(struct iodev *dev)
{
  /* set update period */
  dev->tim->CR1 &= ~STM32_TIM_CR1_CEN;
  dev->tim->ARR = iocfg.ARR.out;

  /* forced inactive output (must switch off CCER per RM0433) */
  dev->tim->CCER = 0;
  dev->tim->CCMR1 = STM32_TIM_CCMR1_OC1M(4) | STM32_TIM_CCMR1_OC2M(4);
  dev->tim->CCMR2 = STM32_TIM_CCMR2_OC3M(4) | STM32_TIM_CCMR2_OC4M(4);
  dev->tim->CCER = iocfg.CCER.out;

  /* reset compare value */
  dev->tim->CCR1 = dev->tim->CCR2 = dev->tim->CCR3 = dev->tim->CCR4 = 0;

  /* all channels PWM mode 1, preload enabled */
  dev->tim->CCMR1 =
    STM32_TIM_CCMR1_OC1M(6) | STM32_TIM_CCMR1_OC1PE |
    STM32_TIM_CCMR1_OC2M(6) | STM32_TIM_CCMR1_OC2PE;
  dev->tim->CCMR2 =
    STM32_TIM_CCMR2_OC3M(6) | STM32_TIM_CCMR2_OC3PE |
    STM32_TIM_CCMR2_OC4M(6) | STM32_TIM_CCMR2_OC4PE;

  /* setup DMA */
  if (dev->dma) {
    dmaStreamDisable(dev->dma); /* clear interrupts per AN4031 */
    dmaStreamSetTransactionSize(dev->dma, (16 + iocfg.dtime) * 4);
    dmaStreamSetMode(dev->dma, dev->CR.out);
  }
}


/* --- tk3srv_iodevin ------------------------------------------------------ */

/* configure timers for input */

static void
tk3srv_iodevin(struct iodev *dev)
{
  /* reset update period (free running) */
  dev->tim->CR1 &= ~STM32_TIM_CR1_CEN;
  dev->tim->ARR = 0xffff;

  /* disable all channels and reset capture value */
  dev->tim->CCER = 0;
  dev->tim->CCMR1 = dev->tim->CCMR2 = 0;
  dev->tim->CCR1 = dev->tim->CCR2 = dev->tim->CCR3 = dev->tim->CCR4 = 0xffff;

  /* configure channels to input, no filter */
  dev->tim->CCMR1 =
    STM32_TIM_CCMR1_IC1F(0) | STM32_TIM_CCMR1_CC1S(1) |
    STM32_TIM_CCMR1_IC2F(0) | STM32_TIM_CCMR1_CC2S(1);
  dev->tim->CCMR2 =
    STM32_TIM_CCMR2_IC3F(0) | STM32_TIM_CCMR2_CC3S(1) |
    STM32_TIM_CCMR2_IC4F(0) | STM32_TIM_CCMR2_CC4S(1);

  /* enable all channels, capture both edges */
  dev->tim->CCER =
    STM32_TIM_CCER_CC1NP | STM32_TIM_CCER_CC1P | STM32_TIM_CCER_CC1E |
    STM32_TIM_CCER_CC2NP | STM32_TIM_CCER_CC2P | STM32_TIM_CCER_CC2E |
    STM32_TIM_CCER_CC3NP | STM32_TIM_CCER_CC3P | STM32_TIM_CCER_CC3E |
    STM32_TIM_CCER_CC4NP | STM32_TIM_CCER_CC4P | STM32_TIM_CCER_CC4E;

  /* setup DMA */
  if (dev->dma) {
    dmaStreamDisable(dev->dma); /* clear interrupts per AN4031 */
    dmaStreamSetTransactionSize(dev->dma, IOMEM_BITS * 4);
    dmaStreamSetMode(dev->dma, dev->CR.in);
  }
}


/* --- tk3srv_reset -------------------------------------------------------- */

void
tk3srv_reset()
{
  int i;

  for(i = 0; i < 2; i++) {
    iodev[i].tim->CCR1 = 0;
    iodev[i].tim->CCR2 = 0;
    iodev[i].tim->CCR3 = 0;
    iodev[i].tim->CCR4 = 0;
    tk3srv_iodevin(&iodev[i]);
  }
}


/* --- tk3srv_send --------------------------------------------------------- */

static void
tk3srv_sendpwm(uint16_t throt[8])
{
  int i;

  /* disable updates before setting the new throttles, to avoid sending
   * unsynchronized set points. */
  iodev[0].tim->CR1 = iodev[1].tim->CR1 =
    STM32_TIM_CR1_UDIS | STM32_TIM_CR1_CEN;

  for(i = 0; i < 2; i++, throt += 4) {
    iodev[i].tim->CCR1 =
      iocfg.CCR.out[0] + ((iocfg.CCR.out[1] * throt[0]) >> 15);
    iodev[i].tim->CCR2 =
      iocfg.CCR.out[0] + ((iocfg.CCR.out[1] * throt[1]) >> 15);
    iodev[i].tim->CCR3 =
      iocfg.CCR.out[0] + ((iocfg.CCR.out[1] * throt[2]) >> 15);
    iodev[i].tim->CCR4 =
      iocfg.CCR.out[0] + ((iocfg.CCR.out[1] * throt[3]) >> 15);
  }

  /* re-enable updates */
  iodev[0].tim->CR1 = iodev[1].tim->CR1 = STM32_TIM_CR1_CEN;
}

static void
tk3srv_senddshot(uint16_t throt[8])
{
  uint16_t *dst, mask;
  int d, i;

  /* encode throttle message */
  for(i = 0; i < 8; i++)
    throt[i] = iocfg.msg(throt[i], 0);

  /* encode DMA pattern */
  for(d = 0; d < 2; d++, throt += 4) {
    /* 16 bits from MSB to LSB */
    for(dst = iodev[d].m0ar, mask = 0x8000; mask; mask >>= 1)
      for(i = 0; i < 4; i++)
        *dst++ = iocfg.CCR.out[(throt[i] & mask) != 0];

    /* final reset bit + dead time */
    for(i = 0; i < 1 + iocfg.dtime; i++, dst += 4)
      *(uint64_t *)dst = 0;
  }

  /* reconfigure TIM[13] and DMA for output */
  TIM4->CR1 &= ~STM32_TIM_CR1_CEN;
  TIM4->DIER = 0; /* clear DMA bits to avoid spurious trigger */
  tk3srv_iodevout(&iodev[0]);
  tk3srv_iodevout(&iodev[1]);

  /* start pacer for a pulse of 16 bits plus dead time for bidshot */
  TIM4->ARR = iocfg.ARR.out;
  TIM4->EGR = TIM_EGR_UG; /* reset */

  TIM4->DIER = TIM_DIER_CC1DE | TIM_DIER_CC2DE;
  TIM4->CR1 |= STM32_TIM_CR1_CEN;
}

int
tk3srv_send(uint16_t throt[8])
{
  if (iocfg.send) iocfg.send(throt);
  return 0;
}


/* --- tk3srv_recv --------------------------------------------------------- */

static void
tk3srv_recv()
{
  /* reconfigure TIM[13] and DMA for input */
  TIM4->CR1 &= ~STM32_TIM_CR1_CEN;
  TIM4->DIER = 0; /* clear DMA bits to avoid spurious trigger */
  tk3srv_iodevin(&iodev[0]);
  tk3srv_iodevin(&iodev[1]);

  /* start pacer for a pulse of 40 bits */
  TIM4->ARR = iocfg.ARR.in;
  TIM4->EGR = TIM_EGR_UG; /* reset */

  TIM4->DIER = TIM_DIER_CC1DE | TIM_DIER_CC2DE;
  TIM4->CR1 |= STM32_TIM_CR1_CEN;
}


/* --- tk3srv_read --------------------------------------------------------- */

void
tk3srv_read(uint32_t tlm[8])
{
  const uint16_t *ev, *n, *end;
  int b, c, d, e, i;

  for(d = 0; d < 2; d++) {
    for(i = 0; i < 4; i++, tlm++) {
      ev = &iodev[d].m0ar[i];
      end = &ev[(IOMEM_BITS-1) * 4];

      /* add virtual infinite timestamp marker at the end */
      iodev[d].m0ar[i + (IOMEM_BITS-1) * 4] = 0xffff;

      /* skip until first event */
      while(ev < end && *ev == 0xffff) ev += 4;

      /* process events */
      e = 0;
      *tlm = 0;
      for(b = 0; ev < end && b < 21; e = !e) {
        /* get next event length */
        for(n = ev + 4; *n == *ev; n += 4) /* empty body */;
        c = (*n - *ev + iocfg.CCR.in/2) / iocfg.CCR.in;
        ev = n;

        /* unpack */
        if (b + c > 21) c = 21 - b;
        b += c;
        *tlm = (*tlm << c) | ((e << c) - e);
      }
    }
  }
}


/* --- STM32_DMA1_CH1_HANDLER ---------------------------------------------- */

/* DMA1 TC is used to reconfigure timers for input in BiDSHOT mode */

static void
tk3srv_dmatc(void *arg, uint32_t flags)
{
  tk3srv_recv();
}
