// หนังสือราชการ กรมส่งเสริมการปกครองท้องถิ่น const { execSync } = require("child_process"); const cheerio = require("cheerio"); const fs = require("fs"); const path = require("path"); const BASE = "https://ladsawai.go.th"; const OUT = path.join(process.cwd(), "หนังสือราชการจากท้องถิ่นจังหวัด"); fs.mkdirSync(OUT, { recursive: true }); function curlHtml(url) { return execSync( `curl -L -s "${url}" -H "User-Agent: Mozilla/5.0" -H "Accept-Language: th-TH,th;q=0.9"`, { encoding: "utf8", maxBuffer: 30 * 1024 * 1024 } ); } function absUrl(src) { if (!src) return null; if (src.startsWith("http")) return src; if (src.startsWith("/")) return BASE + src; return BASE + "/" + src; } // ✅ TODO: ใส่ path ให้ตรงของจริง (ดูจาก address bar) // ตัวอย่างสมมติ: // return `${BASE}/public/dispatch/index/menu/XXXX/page/${page}`; function buildUrl(page) { const menuId = 1243; // << เปลี่ยนให้ตรงเมนู return `${BASE}/public/dispatch/data/index/menu/${menuId}/page/${page}`; } function scrapeOnePage(page, saveHtml = false) { const url = buildUrl(page); const html = curlHtml(url); if (saveHtml) { fs.writeFileSync(path.join(OUT, `debug-page-${page}.html`), html, "utf8"); } const $ = cheerio.load(html); const items = []; const rows = $("table.dispatch_table tbody tr.dispatch_odd, table.dispatch_table tbody tr.dispatch_even"); console.log(`page ${page} rows =`, rows.length); rows.each((_, tr) => { const tds = $(tr).find("td.dispatch_normal"); if (tds.length < 4) return; const date = $(tds[0]).text().replace(/\s+/g, " ").trim(); const no = $(tds[1]).text().replace(/\s+/g, " ").trim(); const topicTd = $(tds[2]); const a = topicTd.find("a[href]").first(); const title = (a.text() || topicTd.text()).replace(/\s+/g, " ").trim(); const link = absUrl(a.attr("href")); // บางแถวมีไอคอน pdf/ไฟล์ const fileLinks = []; topicTd.find("a[href], img[src]").each((_, el) => { const tag = el.tagName?.toLowerCase(); if (tag === "a") { const href = $(el).attr("href"); if (href) fileLinks.push(absUrl(href)); } else if (tag === "img") { const src = $(el).attr("src"); if (src) fileLinks.push(absUrl(src)); } }); const type = $(tds[3]).text().replace(/\s+/g, " ").trim(); if (!title) return; items.push({ date: date || null, no: no || null, title, type: type || null, link: link || null, fileLinks: [...new Set(fileLinks)].filter(Boolean), sourcePage: page, sourceUrl: url, }); }); const output = { source: url, scrapedAt: new Date().toISOString(), page, count: items.length, items, }; fs.writeFileSync(path.join(OUT, `page-${page}.json`), JSON.stringify(output, null, 2), "utf8"); console.log(`✅ page ${page} -> items ${items.length}`); return items; } (function main() { const totalPages = 1231; // จาก pagination ในรูป (มีถึง 1231) const all = []; const seen = new Set(); for (let p = 1; p <= totalPages; p++) { const items = scrapeOnePage(p, p === 1); // debug หน้าแรก for (const it of items) { const key = `${it.date}|${it.no}|${it.title}|${it.type}|${it.link}`; if (seen.has(key)) continue; seen.add(key); all.push(it); } // กันเหนื่อย: ถ้าหน้าไหน 0 แปลว่า url/selector ไม่ตรง ให้หยุดเพื่อ debug if (p === 1 && items.length === 0) { console.log("❌ page 1 = 0: เปิด debug-page-1.html แล้วเช็ค buildUrl(menuId/path)"); break; } } fs.writeFileSync( path.join(OUT, `all.json`), JSON.stringify( { scrapedAt: new Date().toISOString(), totalItems: all.length, items: all }, null, 2 ), "utf8" ); console.log("✅ Total:", all.length); })();