// ประกาศจัดซื้อจัดจ้างภาครัฐ (egp) const { execSync } = require("child_process"); const cheerio = require("cheerio"); const fs = require("fs"); const path = require("path"); const BASE = "https://ladsawai.go.th"; const OUT = path.join(process.cwd(), "ประกาศจัดซื้อจัดจ้างภาครัฐ (egp)"); fs.mkdirSync(OUT, { recursive: true }); function curlHtml(url) { return execSync( `curl -L -s "${url}" -H "User-Agent: Mozilla/5.0" -H "Accept-Language: th-TH,th;q=0.9"`, { encoding: "utf8", maxBuffer: 30 * 1024 * 1024 } ); } function absUrl(src) { if (!src) return null; if (src.startsWith("http")) return src; if (src.startsWith("/")) return BASE + src; return BASE + "/" + src; // กัน "public/..." } function scrapeOnePage(menuId, page, saveHtml = false) { const url = `${BASE}/public/rss/egp/listegp/menu/${menuId}/page/${page}`; const html = curlHtml(url); if (saveHtml) { fs.writeFileSync( path.join(OUT, `page-egp-menu-${menuId}-page-${page}.html`), html, "utf8" ); } const $ = cheerio.load(html); const items = []; // ✅ ตารางรายการ $("table tbody tr").each((_, tr) => { const tds = $(tr).find("td"); if (tds.length < 3) return; const date = $(tds[0]).text().replace(/\s+/g, " ").trim(); const category = $(tds[1]).text().replace(/\s+/g, " ").trim(); const a = $(tds[2]).find("a[href]").first(); const title = a.text().replace(/\s+/g, " ").trim(); const link = absUrl(a.attr("href")); if (!title) return; items.push({ title, date: date || null, category: category || null, link: link || null, sourcePage: page, sourceUrl: url, }); }); const output = { source: url, scrapedAt: new Date().toISOString(), menuId, page, count: items.length, items, }; const outJson = path.join(OUT, `egp-menu-${menuId}-page-${page}.json`); fs.writeFileSync(outJson, JSON.stringify(output, null, 2), "utf8"); console.log(`✅ EGP page ${page} -> items ${items.length}`); return items; } (function main() { const menuId = 1564; const totalPages = 240; const all = []; const seen = new Set(); const saveHtml = false; for (let page = 1; page <= totalPages; page++) { const items = scrapeOnePage(menuId, page, saveHtml); for (const it of items) { // ✅ FIX: EGP ไม่มี image → ใช้ title+date+category+link const key = `${it.title}|${it.date || ""}|${it.category || ""}|${it.link || ""}`; if (seen.has(key)) continue; seen.add(key); all.push(it); } } const merged = { menuId, totalPages, scrapedAt: new Date().toISOString(), totalItems: all.length, items: all, }; // ✅ ไฟล์รวมทั้งหมด (all) const outAll = path.join(OUT, `egp-menu-${menuId}-all.json`); fs.writeFileSync(outAll, JSON.stringify(merged, null, 2), "utf8"); console.log("✅ Saved merged JSON:", outAll); console.log("✅ Total unique items:", all.length); })();