{
  "id": "bbg-p0188-data-pipelines-and-analytics-data",
  "title": "Data Lake Architecture Flow",
  "chapter": "data-pipelines-and-analytics",
  "batch": "04",
  "rank": 38,
  "sourcePage": 188,
  "sourcePointer": "p. 188",
  "status": "accepted",
  "reviewerStatus": "reviewed",
  "fidelityScore": 0.9,
  "canvas": {
    "width": 960,
    "height": 640
  },
  "fireworksTechGraph": {
    "style": "style-1-flat-icon",
    "diagramType": "data-flow",
    "topologyNotes": [
      "source page render inspected",
      "extracted page text inspected",
      "source page render inspected",
      "preserve data-lake topology: diverse sources, batch/stream ingestion, raw zone, processing/catalog, curated analytics, and downstream consumption",
      "omit vendor logos, screenshots, source branding, and long bullet prose"
    ],
    "publicBoundary": [
      "original vector output",
      "no source pixels",
      "no source mark or long wording"
    ]
  },
  "callouts": [],
  "sourceReview": {
    "conceptAnchors": [
      "concept: diverse data sources",
      "concept: batch and streaming ingestion",
      "concept: raw lake storage",
      "concept: processing and catalog",
      "concept: analytics consumers"
    ],
    "labelSource": "curated",
    "semanticStatus": "reviewed"
  },
  "groups": [
    {
      "id": "sources",
      "label": "Data sources",
      "x": 52,
      "y": 120,
      "w": 180,
      "h": 340
    },
    {
      "id": "ingest",
      "label": "Ingestion",
      "x": 284,
      "y": 120,
      "w": 170,
      "h": 340
    },
    {
      "id": "lake",
      "label": "Lake zones",
      "x": 506,
      "y": 120,
      "w": 182,
      "h": 340
    },
    {
      "id": "consume",
      "label": "Consumption",
      "x": 740,
      "y": 120,
      "w": 168,
      "h": 340
    }
  ],
  "shapes": [
    {
      "id": "files",
      "kind": "rect",
      "label": "Files DB APIs",
      "detail": "structured mixed",
      "x": 86,
      "y": 152,
      "w": 112,
      "h": 58,
      "tone": "blue"
    },
    {
      "id": "media",
      "kind": "rect",
      "label": "Media sensors",
      "detail": "unstructured",
      "x": 86,
      "y": 270,
      "w": 112,
      "h": 58,
      "tone": "teal"
    },
    {
      "id": "stream",
      "kind": "queue",
      "label": "Stream ingest",
      "detail": "real time",
      "x": 310,
      "y": 168,
      "w": 118,
      "h": 54,
      "tone": "purple"
    },
    {
      "id": "batch",
      "kind": "rect",
      "label": "Batch jobs",
      "detail": "scheduled",
      "x": 310,
      "y": 300,
      "w": 118,
      "h": 58,
      "tone": "orange"
    },
    {
      "id": "raw",
      "kind": "cylinder",
      "label": "Raw zone",
      "detail": "original data",
      "x": 538,
      "y": 150,
      "w": 118,
      "h": 76,
      "tone": "green"
    },
    {
      "id": "process",
      "kind": "hex",
      "label": "Processing",
      "detail": "clean transform",
      "x": 536,
      "y": 270,
      "w": 122,
      "h": 78,
      "tone": "purple"
    },
    {
      "id": "catalog",
      "kind": "rect",
      "label": "Catalog",
      "detail": "metadata",
      "x": 538,
      "y": 382,
      "w": 118,
      "h": 54,
      "tone": "gray"
    },
    {
      "id": "warehouse",
      "kind": "cylinder",
      "label": "Curated store",
      "detail": "analytics",
      "x": 770,
      "y": 176,
      "w": 110,
      "h": 74,
      "tone": "teal"
    },
    {
      "id": "dashboard",
      "kind": "rect",
      "label": "Dashboards ML",
      "detail": "consume",
      "x": 770,
      "y": 334,
      "w": 110,
      "h": 58,
      "tone": "blue"
    }
  ],
  "connectors": [
    {
      "from": "files",
      "to": "batch",
      "label": "load",
      "flow": "main"
    },
    {
      "from": "media",
      "to": "stream",
      "label": "events",
      "flow": "async"
    },
    {
      "from": "stream",
      "to": "raw",
      "label": "append",
      "flow": "data"
    },
    {
      "from": "batch",
      "to": "raw",
      "label": "land",
      "flow": "data"
    },
    {
      "from": "raw",
      "to": "process",
      "label": "read",
      "flow": "main"
    },
    {
      "from": "process",
      "to": "catalog",
      "label": "register",
      "flow": "control"
    },
    {
      "from": "process",
      "to": "warehouse",
      "label": "publish",
      "flow": "data"
    },
    {
      "from": "warehouse",
      "to": "dashboard",
      "label": "serve",
      "flow": "data"
    },
    {
      "from": "catalog",
      "to": "dashboard",
      "label": "discover",
      "flow": "control",
      "dashed": true
    }
  ]
}
