{
  "name": "Segment PDFs by table of contents with Gemini AI and Chunkr.ai",
  "nodes": [
    {
      "id": "08ae2ea6-5ad1-4fdf-ac75-4e22811437cc",
      "name": "When clicking ‘Execute workflow’",
      "type": "n8n-nodes-base.manualTrigger",
      "position": [
        -1860,
        -220
      ]
    },
    {
      "id": "f81acfdb-2eae-4824-a4ec-2540ff15fa12",
      "name": "Status is:",
      "type": "n8n-nodes-base.switch",
      "position": [
        -40,
        20
      ]
    },
    {
      "id": "77949b9c-b3a1-4cd9-b643-d7f49dc64726",
      "name": "Google Gemini Chat Model",
      "type": "@n8n/n8n-nodes-langchain.lmChatGoogleGemini",
      "position": [
        1360,
        80
      ]
    },
    {
      "id": "1e53fcc8-4697-48c4-90cb-c07dee049949",
      "name": "Structured Output Parser",
      "type": "@n8n/n8n-nodes-langchain.outputParserStructured",
      "position": [
        1620,
        280
      ]
    },
    {
      "id": "ecccb5bf-6625-476e-b010-e50e3b89a80b",
      "name": "Auto-fixing Output Parser",
      "type": "@n8n/n8n-nodes-langchain.outputParserAutofixing",
      "position": [
        1480,
        80
      ]
    },
    {
      "id": "97b36f68-cd64-437b-8af0-dada28b40ea8",
      "name": "Set File Name",
      "type": "n8n-nodes-base.set",
      "position": [
        -980,
        20
      ]
    },
    {
      "id": "46c53d9b-9387-4415-9b8c-b01a12e391a3",
      "name": "When Executed by Another Workflow",
      "type": "n8n-nodes-base.executeWorkflowTrigger",
      "position": [
        -1860,
        300
      ]
    },
    {
      "id": "f2145258-4cb1-4339-81d4-f9dfe524b972",
      "name": "Extract Sections headers as fallback",
      "type": "n8n-nodes-base.code",
      "position": [
        880,
        -180
      ]
    },
    {
      "id": "47b749ec-4832-45a0-826e-13ef23fd4647",
      "name": "Take beginning of Document to look for Table of contents",
      "type": "n8n-nodes-base.code",
      "position": [
        280,
        -180
      ]
    },
    {
      "id": "7f136d22-2195-4d56-803a-a9f6384f3557",
      "name": "Stop and Error",
      "type": "n8n-nodes-base.stopAndError",
      "position": [
        200,
        220
      ]
    },
    {
      "id": "228fed4c-c2a9-4dde-a270-e674ae61b9da",
      "name": "Google Gemini Chat Model1",
      "type": "@n8n/n8n-nodes-langchain.lmChatGoogleGemini",
      "position": [
        1500,
        280
      ]
    },
    {
      "id": "9a21bf6c-208f-45a7-bb78-f27935b53b5d",
      "name": "Sticky Note",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -2640,
        -240
      ],
      "parameters": {
        "width": 580,
        "height": 260,
        "content": "### Welcome to the Document Processing Workflow!\n\nThis workflow automates the extraction and structuring of content from PDF documents. It leverages Chunkr.ai for document parsing and an AI Agent to b"
      }
    },
    {
      "id": "447c7ec5-a094-4034-afc4-fcd7aae5f4de",
      "name": "Convert the PDF to base64",
      "type": "n8n-nodes-base.extractFromFile",
      "position": [
        -1180,
        20
      ]
    },
    {
      "id": "dfc84ad3-4a85-4641-bc16-1f89c54b1c3a",
      "name": "Sticky Note1",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -1940,
        -460
      ],
      "parameters": {
        "width": 480,
        "height": 440,
        "content": "### Node: When clicking ‘Execute workflow’ (Manual Trigger)\n\nThis node allows you to manually start the workflow.\n\n**How to use:**\n1.  Simply click the \"Execute workflow\" button in the n8n interface.\n"
      }
    },
    {
      "id": "b61a3d2e-9773-4e03-ad49-380fca4bb04d",
      "name": "Sticky Note2",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -1940,
        20
      ],
      "parameters": {
        "width": 480,
        "height": 460,
        "content": "### Node: When Executed by Another Workflow (Webhook Trigger)\n\nThis node allows external workflows to trigger this process.\n\n**How to use:**\n1.  Ensure the connected workflow provides a `URL` variable"
      }
    },
    {
      "id": "0f1ddb9a-dc65-44b7-b5ed-b4f9fa64e743",
      "name": "Download PDF from URL",
      "type": "n8n-nodes-base.httpRequest",
      "position": [
        -1620,
        300
      ]
    },
    {
      "id": "b1095619-41f5-4381-9516-3c221ef98388",
      "name": "Download PDF from Google Drive",
      "type": "n8n-nodes-base.googleDrive",
      "position": [
        -1640,
        -220
      ]
    },
    {
      "id": "ceb2c044-a55c-4c9d-9736-6769cce0ed12",
      "name": "Sticky Note3",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -1100,
        -500
      ],
      "parameters": {
        "width": 1000,
        "height": 400,
        "content": "### Node: POST Chunkr Task (HTTP Request) & GET Chunkr task\n\n### 🚨🚨🚨 INSERT A CHUNKR.AI API KEY HERE\n\n\nThis node sends your document to Chunkr.ai for processing.\n\n**How to use:**\n1.  **Authorization:*"
      }
    },
    {
      "id": "45dd3735-1c1a-4b46-ad10-d79234f01b7a",
      "name": "Wait Before Polling the Chunkr Result",
      "type": "n8n-nodes-base.wait",
      "position": [
        -480,
        20
      ]
    },
    {
      "id": "572c6ae1-cf9a-43ff-9c09-3bc650875d70",
      "name": "Sticky Note4",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        680,
        -600
      ],
      "parameters": {
        "width": 480,
        "height": 380,
        "content": "### Node: Extract Sections headers as fallback (Code)\n\nThis node extracts all detected section headers that were identified by chunkr. Chunkr is not ideal at finding these but does a great job general"
      }
    },
    {
      "id": "56cbaf07-965c-4718-b282-dd1b471ffa90",
      "name": "Sticky Note5",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        120,
        -600
      ],
      "parameters": {
        "width": 500,
        "height": 380,
        "content": "### Node: Take beginning of Document to look for Table of contents (Code)\n\n### 🚨🚨🚨 INSERT A CHUNKR.AI API KEY HERE\n\n\nThis node extracts initial document content for AI analysis.\n\n**What it does:**\n* I"
      }
    },
    {
      "id": "2b103146-6917-4745-adff-cb790dbdd7a6",
      "name": "Return each section individually",
      "type": "n8n-nodes-base.code",
      "position": [
        2040,
        -400
      ]
    },
    {
      "id": "8622fab4-edcc-41d4-8456-0c652e8f6eb2",
      "name": "Table of Content Agent",
      "type": "@n8n/n8n-nodes-langchain.agent",
      "position": [
        1420,
        -160
      ]
    },
    {
      "id": "b627fe7d-f342-40f6-912f-c090a619c96c",
      "name": "Return the whole document",
      "type": "n8n-nodes-base.code",
      "position": [
        2120,
        340
      ]
    },
    {
      "id": "14257369-a732-45cc-a45d-16408f9408d7",
      "name": "Create HTML document",
      "type": "n8n-nodes-base.code",
      "position": [
        2580,
        140
      ]
    },
    {
      "id": "3c472dcc-492b-4eb1-8c4d-bef5565047ba",
      "name": "HTML",
      "type": "n8n-nodes-base.html",
      "position": [
        2780,
        140
      ]
    },
    {
      "id": "b912a32d-ae45-40f7-a5a4-e180cf306c6e",
      "name": "Move Binary Data",
      "type": "n8n-nodes-base.moveBinaryData",
      "position": [
        3000,
        140
      ]
    },
    {
      "id": "10b96111-14e2-4061-82ae-643fb243894d",
      "name": "Create Markdown Document",
      "type": "n8n-nodes-base.code",
      "position": [
        2600,
        500
      ]
    },
    {
      "id": "8eeb8129-e5e5-434f-b33a-35c7ea465a6d",
      "name": "Convert to File",
      "type": "n8n-nodes-base.convertToFile",
      "position": [
        2780,
        500
      ]
    },
    {
      "id": "f50391c5-8ebc-48a6-a9d1-4c0d835ff5ea",
      "name": "Sticky Note6",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        1260,
        -600
      ],
      "parameters": {
        "width": 560,
        "height": 400,
        "content": "### Node: AI Agent (AI Agent)\n\nThis is where the Table of Contents is intelligently constructed.\n\n**What it does:**\n* It combines information from two sources:\n    * The raw section headers (from \"Ext"
      }
    },
    {
      "id": "c03c377b-6204-40e6-8079-56c726e8f8a8",
      "name": "Sticky Note7",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        1940,
        -640
      ],
      "parameters": {
        "width": 400,
        "height": 400,
        "content": "## Return each section individually\n\nYou can return each heading + section individually, if you want need to process each chapter one by one. The current configuration outputs three versions for the s"
      }
    },
    {
      "id": "2b6e50ac-702a-4ff9-86cf-7fbde7a58dba",
      "name": "Sticky Note8",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        1980,
        60
      ],
      "parameters": {
        "width": 400,
        "height": 400,
        "content": "## ... OR Return the Document as a whole\n\nIf you want to process the document as a whole you can choose this path. You can download the markdown or html versions of the input PDF. \n\nif you use trigger"
      }
    },
    {
      "id": "c4ec2654-ed9d-4bc3-a1a4-3e8f44aa115d",
      "name": "Sticky Note9",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -2600,
        -820
      ],
      "parameters": {
        "width": 1540,
        "height": 260,
        "content": "# Convert PDFs to structured JSON with the correct subheading hierarchy\n\n## This workflow outputs finds the actual headings of each section, the corresponding text, and outputs each section as an indi"
      }
    },
    {
      "id": "a887cdad-a2c3-4477-a6ed-72d007f560a2",
      "name": "GET Chunkr Task",
      "type": "n8n-nodes-base.httpRequest",
      "position": [
        -260,
        -60
      ]
    },
    {
      "id": "cd52c24f-1698-4887-8d5d-248eb0a904fd",
      "name": "POST Chunkr Task",
      "type": "n8n-nodes-base.httpRequest",
      "position": [
        -700,
        20
      ]
    },
    {
      "id": "5350ae70-85e1-44d2-bdb1-47b7a02708e5",
      "name": "Merge",
      "type": "n8n-nodes-base.merge",
      "position": [
        -1380,
        20
      ]
    }
  ],
  "connections": {
    "HTML": {
      "main": [
        [
          {
            "node": "Move Binary Data",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Merge": {
      "main": [
        [
          {
            "node": "Convert the PDF to base64",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Status is:": {
      "main": [
        [
          {
            "node": "Take beginning of Document to look for Table of contents",
            "type": "main",
            "index": 0
          }
        ],
        [
          {
            "node": "Wait Before Polling the Chunkr Result",
            "type": "main",
            "index": 0
          }
        ],
        [
          {
            "node": "Stop and Error",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Set File Name": {
      "main": [
        [
          {
            "node": "POST Chunkr Task",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "GET Chunkr Task": {
      "main": [
        [
          {
            "node": "Status is:",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "POST Chunkr Task": {
      "main": [
        [
          {
            "node": "Wait Before Polling the Chunkr Result",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Create HTML document": {
      "main": [
        [
          {
            "node": "HTML",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Download PDF from URL": {
      "main": [
        [
          {
            "node": "Merge",
            "type": "main",
            "index": 1
          }
        ]
      ]
    },
    "Table of Content Agent": {
      "main": [
        [
          {
            "node": "Return each section individually",
            "type": "main",
            "index": 0
          },
          {
            "node": "Return the whole document",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Create Markdown Document": {
      "main": [
        [
          {
            "node": "Convert to File",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Google Gemini Chat Model": {
      "ai_languageModel": [
        [
          {
            "node": "Table of Content Agent",
            "type": "ai_languageModel",
            "index": 0
          }
        ]
      ]
    },
    "Structured Output Parser": {
      "ai_outputParser": [
        [
          {
            "node": "Auto-fixing Output Parser",
            "type": "ai_outputParser",
            "index": 0
          }
        ]
      ]
    },
    "Auto-fixing Output Parser": {
      "ai_outputParser": [
        [
          {
            "node": "Table of Content Agent",
            "type": "ai_outputParser",
            "index": 0
          }
        ]
      ]
    },
    "Convert the PDF to base64": {
      "main": [
        [
          {
            "node": "Set File Name",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Google Gemini Chat Model1": {
      "ai_languageModel": [
        [
          {
            "node": "Auto-fixing Output Parser",
            "type": "ai_languageModel",
            "index": 0
          }
        ]
      ]
    },
    "Return the whole document": {
      "main": [
        [
          {
            "node": "Create HTML document",
            "type": "main",
            "index": 0
          },
          {
            "node": "Create Markdown Document",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Download PDF from Google Drive": {
      "main": [
        [
          {
            "node": "Merge",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Return each section individually": {
      "main": [
        []
      ]
    },
    "When Executed by Another Workflow": {
      "main": [
        [
          {
            "node": "Download PDF from URL",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Extract Sections headers as fallback": {
      "main": [
        [
          {
            "node": "Table of Content Agent",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "When clicking ‘Execute workflow’": {
      "main": [
        [
          {
            "node": "Download PDF from Google Drive",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Wait Before Polling the Chunkr Result": {
      "main": [
        [
          {
            "node": "GET Chunkr Task",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Take beginning of Document to look for Table of contents": {
      "main": [
        [
          {
            "node": "Extract Sections headers as fallback",
            "type": "main",
            "index": 0
          }
        ]
      ]
    }
  }
}