{
  "name": "Web site scraper for LLMs with Airtop",
  "nodes": [
    {
      "id": "aaa5bf43-bf76-4433-93c7-7e168f2e140c",
      "name": "On form submission",
      "type": "n8n-nodes-base.formTrigger",
      "position": [
        -400,
        -200
      ]
    },
    {
      "id": "5f862a6f-8079-4d24-a2ae-7442c45c8f04",
      "name": "Info to upload into spreadsheet",
      "type": "n8n-nodes-base.set",
      "position": [
        260,
        -100
      ]
    },
    {
      "id": "556e5da4-4454-46cd-b17d-bd8695515670",
      "name": "Load info to spreadsheet",
      "type": "n8n-nodes-base.googleSheets",
      "position": [
        480,
        -100
      ]
    },
    {
      "id": "4592ada8-8f21-4117-a0ee-8906922f5685",
      "name": "Scrape webpage",
      "type": "n8n-nodes-base.airtop",
      "position": [
        700,
        -100
      ]
    },
    {
      "id": "74a1399d-a335-455c-a3dd-167624b4a5f2",
      "name": "Create Google Docs",
      "type": "n8n-nodes-base.googleDocs",
      "position": [
        920,
        -100
      ]
    },
    {
      "id": "1e69e40f-0465-430b-85ec-c6a71e1cb4a4",
      "name": "Write scraped content",
      "type": "n8n-nodes-base.googleDocs",
      "position": [
        1140,
        -100
      ]
    },
    {
      "id": "49e9b38c-c2d4-49f3-bf3f-531b10257db4",
      "name": "Should scrape more?",
      "type": "n8n-nodes-base.if",
      "position": [
        1380,
        -100
      ]
    },
    {
      "id": "239b2a02-bd66-4805-8ad3-b4ef6daa5e60",
      "name": "Read scraped webpages",
      "type": "n8n-nodes-base.googleSheets",
      "position": [
        1580,
        -175
      ]
    },
    {
      "id": "029d7de1-9043-424e-9c22-aed436436e6a",
      "name": "Retrieve links to scrape",
      "type": "n8n-nodes-base.airtop",
      "position": [
        1800,
        -175
      ]
    },
    {
      "id": "57f2a3ee-40c9-4e72-99f4-739fff04667a",
      "name": "Filter links to insert to Sheets",
      "type": "n8n-nodes-base.code",
      "position": [
        2020,
        -180
      ]
    },
    {
      "id": "6a0f20e8-df45-4799-8162-21d427e19e49",
      "name": "Insert new links",
      "type": "n8n-nodes-base.googleSheets",
      "position": [
        2240,
        -175
      ]
    },
    {
      "id": "9433ab9e-b562-4aa3-a311-ae2a355ce774",
      "name": "Scrape webpage1",
      "type": "n8n-nodes-base.airtop",
      "position": [
        2460,
        -175
      ]
    },
    {
      "id": "704c25cf-b690-492d-a759-7b24a870edf4",
      "name": "Update with new scraped content",
      "type": "n8n-nodes-base.googleDocs",
      "position": [
        2680,
        -175
      ]
    },
    {
      "id": "14f1465d-3d7d-4b7a-87d2-2552b9514e37",
      "name": "Flag scraped link",
      "type": "n8n-nodes-base.set",
      "position": [
        2900,
        -175
      ]
    },
    {
      "id": "364513e7-39c0-47af-83bd-475ffb0ae2a0",
      "name": "Insert flag",
      "type": "n8n-nodes-base.googleSheets",
      "position": [
        3120,
        -100
      ]
    },
    {
      "id": "bd799398-e6c6-4cd4-a8a9-d189acabb194",
      "name": "Sticky Note",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -20,
        -220
      ],
      "parameters": {
        "width": 660,
        "height": 280,
        "content": "## Create Spreadsheet\nCreate a spreadsheet to track all the links found on the webpage."
      }
    },
    {
      "id": "7a5aeddd-9c0f-4ea2-8452-35dd14e6963a",
      "name": "Create Spreadsheet",
      "type": "n8n-nodes-base.googleSheets",
      "position": [
        40,
        -100
      ]
    },
    {
      "id": "19a6c42b-8be5-4599-bc60-99f0b09e3623",
      "name": "Sticky Note1",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        660,
        -220
      ],
      "parameters": {
        "width": 180,
        "height": 280,
        "content": "## Scrape webpage"
      }
    },
    {
      "id": "5de141f9-9e1e-4159-8611-06d23ee7b476",
      "name": "Sticky Note2",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        860,
        -220
      ],
      "parameters": {
        "width": 440,
        "height": 280,
        "content": "## Create Doc\nCreate a document to store all the information scraped from the webpage."
      }
    },
    {
      "id": "a9108190-01d9-4c9a-a7b0-9ea582026acb",
      "name": "Sticky Note3",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        1320,
        -280
      ],
      "parameters": {
        "width": 1980,
        "height": 380,
        "content": "## Recursive Scraping Process\nReads a list of URLs from a Google Sheet, scrapes each page, saves the content to a doc, and adds new links back to the sheet. Repeats the process for each depth level, u"
      }
    },
    {
      "id": "bec8b412-51e1-45df-95b5-ae4e4aeb1fc2",
      "name": "Sticky Note4",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -1160,
        -540
      ],
      "parameters": {
        "width": 660,
        "height": 940,
        "content": "README\n# Recursive Web Scraping \n\n## Use Case  \nAutomating web scraping with recursive depth is ideal for collecting content across multiple linked pages—perfect for content aggregation, lead generati"
      }
    },
    {
      "id": "ef3cfdbf-bd61-4452-bad0-d0154bbd893b",
      "name": "When Executed by Another Workflow",
      "type": "n8n-nodes-base.executeWorkflowTrigger",
      "position": [
        -400,
        0
      ]
    },
    {
      "id": "16d54958-18b6-497f-af9e-5953a39ae0bb",
      "name": "Sticky Note5",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -440,
        -300
      ],
      "parameters": {
        "width": 400,
        "height": 460,
        "content": "## Input Parameters\nRun this workflow using a form or from another workflow"
      }
    },
    {
      "id": "99cc34ec-37eb-423a-9cc5-7c1b7736d352",
      "name": "Unify params",
      "type": "n8n-nodes-base.set",
      "position": [
        -180,
        -100
      ]
    }
  ],
  "connections": {
    "Insert flag": {
      "main": [
        [
          {
            "node": "Should scrape more?",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Unify params": {
      "main": [
        [
          {
            "node": "Create Spreadsheet",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Scrape webpage": {
      "main": [
        [
          {
            "node": "Create Google Docs",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Scrape webpage1": {
      "main": [
        [
          {
            "node": "Update with new scraped content",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Insert new links": {
      "main": [
        [
          {
            "node": "Scrape webpage1",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Flag scraped link": {
      "main": [
        [
          {
            "node": "Insert flag",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Create Google Docs": {
      "main": [
        [
          {
            "node": "Write scraped content",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Create Spreadsheet": {
      "main": [
        [
          {
            "node": "Info to upload into spreadsheet",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "On form submission": {
      "main": [
        [
          {
            "node": "Unify params",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Should scrape more?": {
      "main": [
        [
          {
            "node": "Read scraped webpages",
            "type": "main",
            "index": 0
          }
        ],
        []
      ]
    },
    "Read scraped webpages": {
      "main": [
        [
          {
            "node": "Retrieve links to scrape",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Write scraped content": {
      "main": [
        [
          {
            "node": "Should scrape more?",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Load info to spreadsheet": {
      "main": [
        [
          {
            "node": "Scrape webpage",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Retrieve links to scrape": {
      "main": [
        [
          {
            "node": "Filter links to insert to Sheets",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Info to upload into spreadsheet": {
      "main": [
        [
          {
            "node": "Load info to spreadsheet",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Update with new scraped content": {
      "main": [
        [
          {
            "node": "Flag scraped link",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Filter links to insert to Sheets": {
      "main": [
        [
          {
            "node": "Insert new links",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "When Executed by Another Workflow": {
      "main": [
        [
          {
            "node": "Unify params",
            "type": "main",
            "index": 0
          }
        ]
      ]
    }
  }
}