From daf7b3f4a5b6a88e905ab59f483b7478e7511817 Mon Sep 17 00:00:00 2001
From: Tyler Goodlet <jgbt@protonmail.com>
Date: Wed, 21 Dec 2022 17:30:08 -0500
Subject: [PATCH] Only accept 6 tries for the same duplicate hist frame

When we see multiple history frames that are duplicate to the request
set, bail re-trying after a number of tries (6 just cuz) and return
early from the tsdb backfill loop; presume that this many duplicates
means we've hit the beginning of history. Use a `collections.Counter`
for the duplicate counts. Make sure and warn log in such cases.
---
 piker/data/feed.py | 38 ++++++++++++++++++++++++++++----------
 1 file changed, 28 insertions(+), 10 deletions(-)

diff --git a/piker/data/feed.py b/piker/data/feed.py
index aa2a6bad..744d301f 100644
--- a/piker/data/feed.py
+++ b/piker/data/feed.py
@@ -21,7 +21,10 @@ This module is enabled for ``brokerd`` daemons.
 
 """
 from __future__ import annotations
-from collections import defaultdict
+from collections import (
+    defaultdict,
+    Counter,
+)
 from contextlib import asynccontextmanager as acm
 from datetime import datetime
 from functools import partial
@@ -374,8 +377,9 @@ async def start_backfill(
         # erlangs = config.get('erlangs', 1)
 
         # avoid duplicate history frames with a set of datetime frame
-        # starts.
-        starts: set[datetime] = set()
+        # starts and associated counts of how many duplicates we see
+        # per time stamp.
+        starts: Counter[datetime] = Counter()
 
         # inline sequential loop where we simply pass the
         # last retrieved start dt to the next request as
@@ -403,14 +407,24 @@ async def start_backfill(
                 # request loop until the condition is resolved?
                 return
 
-            if next_start_dt in starts:
+            if (
+                next_start_dt in starts
+                and starts[next_start_dt] <= 6
+            ):
                 start_dt = min(starts)
                 print(f"SKIPPING DUPLICATE FRAME @ {next_start_dt}")
+                starts[start_dt] += 1
                 continue
 
+            elif starts[next_start_dt] > 6:
+                log.warning(
+                    f'NO-MORE-DATA: backend {mod.name} before {next_start_dt}?'
+                )
+                return
+
             # only update new start point if not-yet-seen
             start_dt = next_start_dt
-            starts.add(start_dt)
+            starts[start_dt] += 1
 
             assert array['time'][0] == start_dt.timestamp()
 
@@ -656,10 +670,10 @@ async def tsdb_backfill(
 
         # Load TSDB history into shm buffer (for display) if there is
         # remaining buffer space.
+
         if (
             len(tsdb_history)
         ):
-
             # load the first (smaller) bit of history originally loaded
             # above from ``Storage.load()``.
             to_push = tsdb_history[-prepend_start:]
@@ -682,14 +696,12 @@ async def tsdb_backfill(
 
             # load as much from storage into shm possible (depends on
             # user's shm size settings).
-            while (
-                shm._first.value > 0
-            ):
+            while shm._first.value > 0:
 
                 tsdb_history = await storage.read_ohlcv(
                     fqsn,
-                    end=tsdb_last_frame_start,
                     timeframe=timeframe,
+                    end=tsdb_last_frame_start,
                 )
 
                 # empty query
@@ -930,6 +942,8 @@ async def allocate_persistent_feed(
     some_data_ready = trio.Event()
     feed_is_live = trio.Event()
 
+    symstr = symstr.lower()
+
     # establish broker backend quote stream by calling
     # ``stream_quotes()``, which is a required broker backend endpoint.
     init_msg, first_quote = await bus.nursery.start(
@@ -1130,6 +1144,10 @@ async def open_feed_bus(
     flumes: dict[str, Flume] = {}
 
     for symbol in symbols:
+
+        # we always use lower case keys internally
+        symbol = symbol.lower()
+
         # if no cached feed for this symbol has been created for this
         # brokerd yet, start persistent stream and shm writer task in
         # service nursery