// ประกาศจัดซื้อจัดจ้างภาครัฐ (egp) const { execSync } = require("child_process"); const cheerio = require("cheerio"); const fs = require("fs"); const path = require("path"); const BASE = "https://ladsawai.go.th"; const OUT = path.join(process.cwd(), "ประกาศจัดซื้อจัดจ้างภาครัฐ (egp)"); // ประกาศจัดซื้อจัดจ้างภาครัฐ (egp) fs.mkdirSync(OUT, { recursive: true }); function curlHtml(url) { return execSync( `curl -L -s "${url}" -H "User-Agent: Mozilla/5.0" -H "Accept-Language: th-TH,th;q=0.9"`, { encoding: "utf8", maxBuffer: 20 * 1024 * 1024 } ); } function absUrl(src) { if (!src) return null; if (src.startsWith("http")) return src; return BASE + src; } function scrapeOnePage(menuId, page, saveHtml = false) { const url = `${BASE}/public/rss/egp/listegp/menu/${menuId}/page/${page}`; const html = curlHtml(url); if (saveHtml) { fs.writeFileSync( path.join(OUT, `page-egp-menu-${menuId}-page-${page}.html`), html, "utf8" ); } const $ = cheerio.load(html); const items = []; // ✅ ตารางรายการ $("table tbody tr").each((_, tr) => { const tds = $(tr).find("td"); if (tds.length < 3) return; const date = $(tds[0]).text().replace(/\s+/g, " ").trim(); const category = $(tds[1]).text().replace(/\s+/g, " ").trim(); const a = $(tds[2]).find("a").first(); const title = a.text().replace(/\s+/g, " ").trim(); const link = absUrl(a.attr("href")); // หน้ารายละเอียด (ถ้ามี) if (!title) return; items.push({ title, date: date || null, category: category || null, link: link || null, sourcePage: page, sourceUrl: url, }); }); const output = { source: url, scrapedAt: new Date().toISOString(), menuId, page, count: items.length, items, }; const outJson = path.join(OUT, `egp-menu-${menuId}-page-${page}.json`); fs.writeFileSync(outJson, JSON.stringify(output, null, 2), "utf8"); console.log(`✅ EGP page ${page} -> items ${items.length}`); return items; } (function main() { const menuId = 1564; // ประกาศจัดซื้อจัดจ้างภาครัฐ (egp) const totalPages = 240; const all = []; const seen = new Set(); // ถ้าไม่อยากให้มี HTML 53 ไฟล์ ให้เป็น false const saveHtml = false; for (let page = 1; page <= totalPages; page++) { const items = scrapeOnePage(menuId, page, saveHtml); // รวม + กันซ้ำ for (const it of items) { const key = `${it.title}|${it.date || ""}|${it.image || ""}`; if (seen.has(key)) continue; seen.add(key); all.push(it); } } const merged = { menuId, totalPages, scrapedAt: new Date().toISOString(), totalItems: all.length, items: all, }; const outAll = path.join(OUT, `list-menu-${menuId}-all.json`); fs.writeFileSync(outAll, JSON.stringify(merged, null, 2), "utf8"); console.log("✅ Saved merged JSON:", outAll); console.log("✅ Total unique items:", all.length); })();