128 lines
3.2 KiB
JavaScript
128 lines
3.2 KiB
JavaScript
// ประกาศจัดซื้อจัดจ้าง
|
|
|
|
|
|
const { execSync } = require("child_process");
|
|
const cheerio = require("cheerio");
|
|
const fs = require("fs");
|
|
const path = require("path");
|
|
|
|
const BASE = "https://ladsawai.go.th";
|
|
const OUT = path.join(process.cwd(), "ประกาศจัดซื้อจัดจ้าง");
|
|
|
|
fs.mkdirSync(OUT, { recursive: true });
|
|
|
|
function curlHtml(url) {
|
|
return execSync(
|
|
`curl -L -s "${url}" -H "User-Agent: Mozilla/5.0" -H "Accept-Language: th-TH,th;q=0.9"`,
|
|
{ encoding: "utf8", maxBuffer: 20 * 1024 * 1024 }
|
|
);
|
|
}
|
|
|
|
function absUrl(src) {
|
|
if (!src) return null;
|
|
if (src.startsWith("http")) return src;
|
|
if (src.startsWith("/")) return BASE + src;
|
|
return BASE + "/" + src; // กัน "public/..."
|
|
}
|
|
|
|
function scrapeOnePage(menuId, page, saveHtml = false) {
|
|
const url = `${BASE}/public/list/data/index/menu/${menuId}/page/${page}`;
|
|
const html = curlHtml(url);
|
|
|
|
if (saveHtml) {
|
|
fs.writeFileSync(path.join(OUT, `page-menu-${menuId}-page-${page}.html`), html, "utf8");
|
|
}
|
|
|
|
const $ = cheerio.load(html);
|
|
const items = [];
|
|
|
|
$(".row.data-row").each((_, row) => {
|
|
const el = $(row);
|
|
|
|
const left = el.find(".col-12.col-sm-10").first();
|
|
const a = left.find("a.listdataconfig_link").first();
|
|
|
|
// title อยู่ใน <label>
|
|
const title =
|
|
a.text().replace(/\s+/g, " ").trim();
|
|
|
|
const link = absUrl(a.attr("href"));
|
|
|
|
const date = el
|
|
.find(".col-12.col-sm-2 #show-right-date")
|
|
.text()
|
|
.replace(/\s+/g, " ")
|
|
.trim();
|
|
|
|
// icon (optional)
|
|
const icons = [];
|
|
left.find("img").each((_, img) => {
|
|
const src = $(img).attr("src");
|
|
if (src) icons.push(absUrl(src));
|
|
});
|
|
|
|
if (!title) return;
|
|
|
|
items.push({
|
|
title,
|
|
date: date || null,
|
|
link: link || null,
|
|
icons,
|
|
sourcePage: page,
|
|
sourceUrl: url,
|
|
});
|
|
});
|
|
|
|
const output = {
|
|
source: url,
|
|
scrapedAt: new Date().toISOString(),
|
|
menuId,
|
|
page,
|
|
count: items.length,
|
|
items,
|
|
};
|
|
|
|
const outJson = path.join(OUT, `list-menu-${menuId}-page-${page}.json`);
|
|
fs.writeFileSync(outJson, JSON.stringify(output, null, 2), "utf8");
|
|
|
|
console.log(`✅ page ${page} -> items ${items.length}`);
|
|
return items;
|
|
}
|
|
|
|
(function main() {
|
|
const menuId = 1236; // ประกาศจัดซื้อจัดจ้าง
|
|
const totalPages = 12;
|
|
|
|
const all = [];
|
|
const seen = new Set();
|
|
|
|
// ถ้าไม่อยากให้มี HTML 53 ไฟล์ ให้เป็น false
|
|
const saveHtml = false;
|
|
|
|
for (let page = 1; page <= totalPages; page++) {
|
|
const items = scrapeOnePage(menuId, page, saveHtml);
|
|
|
|
// รวม + กันซ้ำ
|
|
for (const it of items) {
|
|
const key = `${it.title}|${it.date || ""}|${it.image || ""}`;
|
|
if (seen.has(key)) continue;
|
|
seen.add(key);
|
|
all.push(it);
|
|
}
|
|
}
|
|
|
|
const merged = {
|
|
menuId,
|
|
totalPages,
|
|
scrapedAt: new Date().toISOString(),
|
|
totalItems: all.length,
|
|
items: all,
|
|
};
|
|
|
|
const outAll = path.join(OUT, `list-menu-${menuId}-all.json`);
|
|
fs.writeFileSync(outAll, JSON.stringify(merged, null, 2), "utf8");
|
|
|
|
console.log("✅ Saved merged JSON:", outAll);
|
|
console.log("✅ Total unique items:", all.length);
|
|
})();
|