{
  "name": "Scrape and summarize posts of a news site without RSS feed using AI and save them to a NocoDB",
  "nodes": [
    {
      "id": "97711d12-20de-40aa-b994-d2b10f20a5e5",
      "name": "Extract the HTML with the right css class",
      "type": "n8n-nodes-base.html",
      "position": [
        -500,
        0
      ]
    },
    {
      "id": "b874b570-daae-4878-b525-07ac30756eb1",
      "name": "Summary",
      "type": "n8n-nodes-base.openAi",
      "position": [
        -880,
        440
      ]
    },
    {
      "id": "72696278-2d44-4073-936a-6fe9df1bc7d8",
      "name": "Keywords",
      "type": "n8n-nodes-base.openAi",
      "position": [
        -880,
        620
      ]
    },
    {
      "id": "0bfdb3be-76ef-4bb3-902f-f0869342b83c",
      "name": "Rename keywords",
      "type": "n8n-nodes-base.set",
      "position": [
        -700,
        620
      ]
    },
    {
      "id": "0387cf34-41c9-4729-8570-1db7b17c42f4",
      "name": "Rename Summary",
      "type": "n8n-nodes-base.set",
      "position": [
        -700,
        440
      ]
    },
    {
      "id": "5fa1702c-f0bf-4524-bc8f-6f550dd83f1e",
      "name": "Merge",
      "type": "n8n-nodes-base.merge",
      "position": [
        -480,
        560
      ]
    },
    {
      "id": "25128a71-b0d5-49a4-adb8-c3fbe03c0a85",
      "name": "Extract date",
      "type": "n8n-nodes-base.html",
      "position": [
        -500,
        -160
      ]
    },
    {
      "id": "138b3bd6-494a-49b9-b5b8-c9febcfef9fb",
      "name": "Select posts of last 7 days",
      "type": "n8n-nodes-base.code",
      "position": [
        120,
        0
      ]
    },
    {
      "id": "1ace953b-e298-4fc2-8970-327f736889ec",
      "name": "Merge date & links",
      "type": "n8n-nodes-base.merge",
      "position": [
        -100,
        0
      ]
    },
    {
      "id": "bba692fc-c225-41be-a969-179d8b99c071",
      "name": "HTTP Request1",
      "type": "n8n-nodes-base.httpRequest",
      "position": [
        320,
        0
      ]
    },
    {
      "id": "26671065-631f-4684-9ee1-15f26b4cf1e4",
      "name": "Merge Content with Date & Link",
      "type": "n8n-nodes-base.merge",
      "position": [
        500,
        260
      ]
    },
    {
      "id": "79beb744-97b8-4072-824a-6736b0a080ef",
      "name": "Extract individual posts",
      "type": "n8n-nodes-base.html",
      "position": [
        500,
        0
      ]
    },
    {
      "id": "e89d9de5-875b-453e-825a-26f2bebcc8df",
      "name": "Sticky Note",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        80,
        -107
      ],
      "parameters": {
        "width": 180.9747474601832,
        "height": 276.31054308676767,
        "content": "Select only the newest news: todays date going back xy days"
      }
    },
    {
      "id": "8a603f2f-4208-48c7-b169-e5613f13fa7d",
      "name": "Merge ChatGPT output with Date & Link",
      "type": "n8n-nodes-base.merge",
      "position": [
        -180,
        560
      ]
    },
    {
      "id": "e1036421-9ce1-4121-a692-602410ec7c95",
      "name": "Sticky Note1",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -539.7802584556148,
        -4.722020203185366
      ],
      "parameters": {
        "width": 182.2748213508401,
        "height": 304.2550759710132,
        "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nExtracting the individual links of the press release page in order to retrieve the individual posts on their respective **url**"
      }
    },
    {
      "id": "3655ab22-6a17-429a-9d9b-d96bbcc78fee",
      "name": "Sticky Note2",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -538.404803912782,
        -304
      ],
      "parameters": {
        "width": 178.75185894039254,
        "height": 289.463147786618,
        "content": "Extracting the dates of the posts of the press release page.\nThe right CSS selector has to be chosen.\n[More info on datagrab.io](https://datagrab.io/blog/guide-to-css-selectors-for-web-scraping/)"
      }
    },
    {
      "id": "2e27fb4c-426a-41e1-b5fb-9b2d78acd2a7",
      "name": "Sticky Note3",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -1300,
        -299.82161760751774
      ],
      "parameters": {
        "width": 334.4404040637068,
        "height": 1127.2017245821128,
        "content": "# Scraping posts of a news site without RSS feed\n\n\nThe [News Site](https://www.colt.net/resources/type/news/) from Colt, a telecom company, does not offer an RSS feed, therefore web scraping is the \nc"
      }
    },
    {
      "id": "d43bd5b7-2aff-4a07-8aca-ca4747ec6c4d",
      "name": "Sticky Note4",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -927.8447474890202,
        -80
      ],
      "parameters": {
        "width": 153.90180146729315,
        "height": 237.91333335255808,
        "content": "Weekly cron job"
      }
    },
    {
      "id": "e732d136-fcf1-4fc3-8bb6-bdcea3c78d9e",
      "name": "Sticky Note5",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -760,
        -80
      ],
      "parameters": {
        "width": 185.41515152389002,
        "height": 241.454848504947,
        "content": "The html of the news site is being retrieved: https://www.colt.net/resources/type/news/"
      }
    },
    {
      "id": "d5e29ec3-5ef2-42f3-b316-9350644dbba4",
      "name": "Sticky Note6",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -340,
        -306
      ],
      "parameters": {
        "width": 187.3613302133812,
        "height": 469.2923233086395,
        "content": "As the extraction are returned as arrays, they transformed into individual JSON items to enable looping with other nodes"
      }
    },
    {
      "id": "1af15c45-32c0-4abf-a35d-be7206823569",
      "name": "Sticky Note7",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -120,
        -103.54151515238902
      ],
      "parameters": {
        "width": 150,
        "height": 274.50898992724416,
        "content": "The links of the individual posts and the dates of the posts "
      }
    },
    {
      "id": "f7c42748-f227-42d0-a9e2-fcb16dbd0f75",
      "name": "Retrieve the web page for further processsing",
      "type": "n8n-nodes-base.httpRequest",
      "position": [
        -720,
        0
      ]
    },
    {
      "id": "b2c36f26-8221-478f-a4b0-22758b1e5e58",
      "name": "Sticky Note9",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        292,
        -100
      ],
      "parameters": {
        "width": 155.0036363426638,
        "height": 272.1479798256519,
        "content": "Get the html of each individual **newest** post"
      }
    },
    {
      "id": "6ae05c31-c09a-4b4e-a013-41571937bc39",
      "name": "Sticky Note10",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        460,
        -100
      ],
      "parameters": {
        "width": 184.07417896879767,
        "height": 269.2504410842093,
        "content": "Extracting the title & content (text) of each individual news post with the right CSS selector"
      }
    },
    {
      "id": "e2da76d4-0c8c-4c61-924f-50aa9387e9ab",
      "name": "Sticky Note11",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        460,
        180
      ],
      "parameters": {
        "width": 191.87778190338406,
        "height": 234.13422787857044,
        "content": "Merge link to url, date with content (text) and title of each news psot"
      }
    },
    {
      "id": "c124aaac-dce6-4658-9027-bdfe5c0c81e6",
      "name": "Sticky Note12",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -907.2264215202996,
        331.0681740778203
      ],
      "parameters": {
        "width": 150,
        "height": 256.2444361932317,
        "content": "Create a summary of each news post with ChatGPT. You need a ChatGPT API account for this"
      }
    },
    {
      "id": "c9037e74-007b-4e44-b7f9-90e78b853eb5",
      "name": "Sticky Note13",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -909.595196087218,
        610.7495589157902
      ],
      "parameters": {
        "width": 152.85976723045226,
        "height": 218.52702200939785,
        "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\nGet the 3 keywords of each news post"
      }
    },
    {
      "id": "756397d9-de80-4114-9dee-b4f4b9593333",
      "name": "Sticky Note14",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -740,
        340
      ],
      "parameters": {
        "width": 182.7735784797001,
        "height": 489.05192374172555,
        "content": "Just a renaming of data fields and eliminating unnecessary ones"
      }
    },
    {
      "id": "a0dcb254-f064-45ed-8e22-30a6d079085b",
      "name": "Sticky Note15",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -520,
        480
      ],
      "parameters": {
        "width": 169.7675735887227,
        "height": 254.94383570413422,
        "content": "Merge summary and keywords of each news post"
      }
    },
    {
      "id": "82993166-b273-4b82-a954-554c6892f825",
      "name": "Schedule Trigger each week",
      "type": "n8n-nodes-base.scheduleTrigger",
      "position": [
        -900,
        0
      ]
    },
    {
      "id": "3d670eb9-5a36-4cd9-8d2c-40adf848485e",
      "name": "Sticky Note16",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -220,
        477.5081090810816
      ],
      "parameters": {
        "width": 180.1723775015045,
        "height": 260.5279202647822,
        "content": "Add title, link and date to summary and keywords of each news post"
      }
    },
    {
      "id": "62021393-e988-4834-9fa2-75a957b42890",
      "name": "NocoDB news database",
      "type": "n8n-nodes-base.nocoDb",
      "position": [
        60,
        560
      ]
    },
    {
      "id": "e59e9fab-10a7-470b-afa6-e1d4b4e57723",
      "name": "Sticky Note17",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        280,
        480
      ],
      "parameters": {
        "width": 483.95825869942666,
        "height": 268.5678114630957,
        "content": "## News summaries and keywords &rarr; database\n\n[NocoDB](https://nocodb.com/) is an SQL database, here we store the news summaries and keywords for further processing. Any other output target can be c"
      }
    },
    {
      "id": "253b414b-9a5b-4a25-892b-9aa011d55d28",
      "name": "Sticky Note18",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        20,
        480
      ],
      "parameters": {
        "width": 262.99083066277313,
        "height": 268.56781146309544,
        "content": ""
      }
    },
    {
      "id": "438e8dde-ce0a-4e5e-8d62-d735d19ec189",
      "name": "Create single link items",
      "type": "n8n-nodes-base.itemLists",
      "position": [
        -300,
        0
      ]
    },
    {
      "id": "d721776b-fefc-4e72-91ef-6710f10b0393",
      "name": "Create single date items",
      "type": "n8n-nodes-base.itemLists",
      "position": [
        -300,
        -160
      ]
    }
  ],
  "connections": {
    "Merge": {
      "main": [
        [
          {
            "node": "Merge ChatGPT output with Date & Link",
            "type": "main",
            "index": 1
          }
        ]
      ]
    },
    "Summary": {
      "main": [
        [
          {
            "node": "Rename Summary",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Keywords": {
      "main": [
        [
          {
            "node": "Rename keywords",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Extract date": {
      "main": [
        [
          {
            "node": "Create single date items",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "HTTP Request1": {
      "main": [
        [
          {
            "node": "Extract individual posts",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Rename Summary": {
      "main": [
        [
          {
            "node": "Merge",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Rename keywords": {
      "main": [
        [
          {
            "node": "Merge",
            "type": "main",
            "index": 1
          }
        ]
      ]
    },
    "Merge date & links": {
      "main": [
        [
          {
            "node": "Select posts of last 7 days",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Create single date items": {
      "main": [
        [
          {
            "node": "Merge date & links",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Create single link items": {
      "main": [
        [
          {
            "node": "Merge date & links",
            "type": "main",
            "index": 1
          }
        ]
      ]
    },
    "Extract individual posts": {
      "main": [
        [
          {
            "node": "Merge Content with Date & Link",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Schedule Trigger each week": {
      "main": [
        [
          {
            "node": "Retrieve the web page for further processsing",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Select posts of last 7 days": {
      "main": [
        [
          {
            "node": "Merge Content with Date & Link",
            "type": "main",
            "index": 1
          },
          {
            "node": "HTTP Request1",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Merge Content with Date & Link": {
      "main": [
        [
          {
            "node": "Summary",
            "type": "main",
            "index": 0
          },
          {
            "node": "Keywords",
            "type": "main",
            "index": 0
          },
          {
            "node": "Merge ChatGPT output with Date & Link",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Merge ChatGPT output with Date & Link": {
      "main": [
        [
          {
            "node": "NocoDB news database",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Extract the HTML with the right css class": {
      "main": [
        [
          {
            "node": "Create single link items",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Retrieve the web page for further processsing": {
      "main": [
        [
          {
            "node": "Extract the HTML with the right css class",
            "type": "main",
            "index": 0
          },
          {
            "node": "Extract date",
            "type": "main",
            "index": 0
          }
        ]
      ]
    }
  }
}