113 lines
2.5 KiB
JavaScript
113 lines
2.5 KiB
JavaScript
const fs = require("fs");
|
|
const path = require("path");
|
|
const cheerio = require("cheerio");
|
|
const { chromium } = require("playwright");
|
|
|
|
const BASE = "https://ladsawai.go.th";
|
|
const OUT = path.join(process.cwd(), "ประกาศจัดซื้อจัดจ้าง");
|
|
fs.mkdirSync(OUT, { recursive: true });
|
|
|
|
function absUrl(src) {
|
|
if (!src) return null;
|
|
if (src.startsWith("http")) return src;
|
|
if (src.startsWith("/")) return BASE + src;
|
|
return BASE + "/" + src;
|
|
}
|
|
|
|
async function scrapePage(menuId, page) {
|
|
const url = `${BASE}/public/list/data/index/menu/${menuId}/page/${page}`;
|
|
|
|
const browser = await chromium.launch({ headless: true });
|
|
const pageObj = await browser.newPage();
|
|
|
|
await pageObj.goto(url, { waitUntil: "networkidle", timeout: 60000 });
|
|
|
|
// รอให้ JS render row จริง
|
|
await pageObj.waitForSelector(".row.data-row", { timeout: 30000 });
|
|
|
|
const html = await pageObj.content();
|
|
await browser.close();
|
|
|
|
// debug HTML ที่ render แล้ว
|
|
fs.writeFileSync(
|
|
path.join(OUT, `debug-rendered-page-${page}.html`),
|
|
html,
|
|
"utf8"
|
|
);
|
|
|
|
const $ = cheerio.load(html);
|
|
const items = [];
|
|
|
|
$(".row.data-row").each((_, row) => {
|
|
const el = $(row);
|
|
|
|
const left = el.find(".col-12.col-sm-10").first();
|
|
|
|
// 👉 ตอนนี้ text จะมาแล้ว
|
|
const title = left
|
|
.clone()
|
|
.find("a,img")
|
|
.remove()
|
|
.end()
|
|
.text()
|
|
.replace(/\s+/g, " ")
|
|
.trim();
|
|
|
|
const a = left.find("a[href]").first();
|
|
const link = absUrl(a.attr("href"));
|
|
|
|
const date = el
|
|
.find(".col-12.col-sm-2")
|
|
.text()
|
|
.replace(/\s+/g, " ")
|
|
.trim();
|
|
|
|
if (!title) return;
|
|
|
|
items.push({
|
|
title,
|
|
date,
|
|
link,
|
|
sourcePage: page,
|
|
sourceUrl: url,
|
|
});
|
|
});
|
|
|
|
return items;
|
|
}
|
|
|
|
(async () => {
|
|
const menuId = 1236;
|
|
const totalPages = 12;
|
|
|
|
const all = [];
|
|
const seen = new Set();
|
|
|
|
for (let p = 1; p <= totalPages; p++) {
|
|
const items = await scrapePage(menuId, p);
|
|
console.log(`✅ page ${p} -> ${items.length} items`);
|
|
|
|
for (const it of items) {
|
|
const key = `${it.title}|${it.date}|${it.link}`;
|
|
if (seen.has(key)) continue;
|
|
seen.add(key);
|
|
all.push(it);
|
|
}
|
|
}
|
|
|
|
const out = {
|
|
menuId,
|
|
totalItems: all.length,
|
|
scrapedAt: new Date().toISOString(),
|
|
items: all,
|
|
};
|
|
|
|
fs.writeFileSync(
|
|
path.join(OUT, `menu-${menuId}-all.json`),
|
|
JSON.stringify(out, null, 2),
|
|
"utf8"
|
|
);
|
|
|
|
console.log("🎉 DONE:", all.length);
|
|
})();
|