migrate-lsw/local-performance-assessment.js

269 lines
7.8 KiB
JavaScript

// หน้าแรก > งานบริการ > การประเมินประสิทธิภาพของ อปท. (LPA)
const { execSync } = require("child_process");
const cheerio = require("cheerio");
const fs = require("fs");
const path = require("path");
const axios = require("axios").default;
const BASE = "https://ladsawai.go.th";
const OUT = path.join(process.cwd(), "การประเมินประสิทธิภาพของ อปท. (LPA)");
fs.mkdirSync(OUT, { recursive: true });
function curlHtml(url) {
return execSync(
`curl -L -s "${url}" -H "User-Agent: Mozilla/5.0" -H "Accept-Language: th-TH,th;q=0.9"`,
{ encoding: "utf8", maxBuffer: 30 * 1024 * 1024 }
);
}
function absUrl(href) {
if (!href) return null;
if (href.startsWith("http")) return href;
if (href.startsWith("/")) return BASE + href;
return BASE + "/" + href;
}
function buildUrl(menuId, catid, page) {
if(catid){
return `${BASE}/public/list/data/datacategory/catid/${catid}/menu/${menuId}/page/${page}`;
}
else{
return `${BASE}/public/list/data/index/menu/${menuId}/page/${page}`;
}
}
function detectTotalPages($) {
let maxPage = 1;
$("a").each((_, a) => {
const t = $(a).text().trim();
if (/^\d+$/.test(t)) maxPage = Math.max(maxPage, Number(t));
});
return maxPage;
}
function extractFileLinksFromDetail(detailUrl) {
const html = curlHtml(detailUrl);
const $ = cheerio.load(html);
const files = [];
$("a.uploadconfig_link").each((_, a) => {
const el = $(a);
const href = el.attr("href");
const dataHref = el.attr("data-href");
const fileUrl = absUrl(dataHref || href);
if (!fileUrl) return;
const text = el.text().replace(/\s+/g, " ").trim() || null
let title = text
let downloadCount = 0
if(text && text.includes('ดาวน์โหลดแล้ว')){
try {
const splitList = text.split(' ดาวน์โหลดแล้ว ')
title = splitList[0]
downloadCount = parseInt(splitList[1].replace('ครั้ง', '').trim())
} catch (error) {
title = text
downloadCount = 0
}
}
files.push({
text: title,
url: fileUrl,
downloadCount: downloadCount
});
});
// fallback: ลิงก์ไฟล์แบบตรง ๆ
$("a[href]").each((_, a) => {
const href = $(a).attr("href");
const u = absUrl(href);
if (!u) return;
if (/\.(pdf|doc|docx|xls|xlsx|ppt|pptx|zip|rar)(\?|$)/i.test(u)) {
if (!files.some((f) => f.url === u)) {
files.push({ text: $(a).text().trim() || null, url: u });
}
}
});
return files;
}
// ✅ ยิง api /status/1/ เพื่อเอา path จริง
async function resolveRealFilePath(fileUrl) {
try {
// กันกรณีมี / ท้ายอยู่แล้ว
const statusUrl = fileUrl.replace(/\/$/, "") + "/status/1/";
const res = await axios.get(statusUrl, { timeout: 30000 });
return res?.data?.path || null;
} catch (e) {
return null;
}
}
// ✅ limit concurrency แบบง่าย (กันยิงหนักเกิน)
async function mapLimit(arr, limit, mapper) {
const ret = [];
let i = 0;
async function worker() {
while (i < arr.length) {
const idx = i++;
ret[idx] = await mapper(arr[idx], idx);
}
}
const workers = Array.from({ length: Math.min(limit, arr.length) }, worker);
await Promise.all(workers);
return ret;
}
async function scrapeOnePage(menuId, catid, page, saveHtml = false) {
const url = buildUrl(menuId, catid, page);
const html = curlHtml(url);
if (saveHtml) {
fs.writeFileSync(
path.join(OUT, `debug-menu-${menuId}${catid ? `-catid-${catid}` : ''}-page-${page}.html`),
html,
"utf8"
);
}
const $ = cheerio.load(html);
// ✅ แปลง rows เป็น array ก่อน
const rows = $(".row.data-row").toArray();
// ✅ ประมวลผลแบบมี limit (เช่น 5 concurrent)
const items = (await mapLimit(rows, 5, async (row) => {
const el = $(row);
const a = el.find("a.listdataconfig_link[href]").first();
if (!a.length) return null;
const title =
a.find("label.font-weight").text().replace(/\s+/g, " ").trim() ||
a.text().replace(/\s+/g, " ").trim();
if (!title) return null;
const detailUrl = absUrl(a.attr("href"));
let files = [];
let realPath = null;
try {
if(catid){
//มี category แล้วไปหาเนื้อหาได้เลย
if (detailUrl) files = extractFileLinksFromDetail(detailUrl);
const firstFileUrl = files?.[0]?.url ? absUrl(files[0].url) : null;
if (firstFileUrl) {
realPath = await resolveRealFilePath(firstFileUrl);
}
}
else{
//ไม่มี category เอา category ไปหา list ก่อน
try {
// Extract id and menu from URL: /public/list/data/detail/catid/{catid}/menu/{menu}/page/{page}
const urlMatch = detailUrl.match(/\/catid\/(\d+)\/menu\/(\d+)/);
const catid = urlMatch ? urlMatch[1] : null;
// const menu = urlMatch ? urlMatch[2] : null;
if(catid){
const cateList = await scrapeOnePage(menuId, catid, 1, saveHtml)
return {
title,
detailUrl: detailUrl || null,
items: cateList.items,
sourcePage: page,
sourceUrl: url,
};
}
} catch (error) {
console.error('error :', error)
}
}
} catch (e) {
files = [];
realPath = null;
}
return {
title,
detailUrl: detailUrl || null,
fileName: files?.[0]?.text ? files[0].text : null,
fileUrl: files?.[0]?.url ? absUrl(files[0].url) : null, // ไฟล์จากหน้า detail
filePath: `https://ladsawai.go.th/public/` + realPath, // ✅ ของจริงจาก api /status/1/
downloadCount: files?.[0]?.downloadCount ? files[0].downloadCount : null,
sourcePage: page,
sourceUrl: url,
};
}))
.filter(Boolean); // ตัด null ออก
const output = {
source: url,
scrapedAt: new Date().toISOString(),
menuId,
catid,
page,
count: items.length,
items,
};
fs.writeFileSync(
path.join(OUT, `menu-${menuId}${catid ? `-catid-${catid}` : ''}-page-${page}.json`),
JSON.stringify(output, null, 2),
"utf8"
);
console.log(`✅ page ${page} -> items ${items.length}`);
return { $, items: catid ? output : items };
}
(async function main() {
const menuId = 1265;
// const catid = 86;
const first = await scrapeOnePage(menuId, undefined, 1, true);
const totalPages = detectTotalPages(first.$);
console.log("✅ totalPages =", totalPages);
const all = [];
const seen = new Set();
function addItems(items) {
for (const it of items) {
const key = `${it.title}|${it.detailUrl || ""}|${it.filePath || ""}`;
if (seen.has(key)) continue;
seen.add(key);
all.push(it);
}
}
addItems(first.items);
for (let p = 2; p <= totalPages; p++) {
const { items } = await scrapeOnePage(menuId, undefined, p, false);
addItems(items);
}
const merged = {
menuId,
// catid,
totalPages,
scrapedAt: new Date().toISOString(),
totalItems: all.length,
items: all,
};
const outAll = path.join(OUT, `menu-${menuId}-all.json`);
fs.writeFileSync(outAll, JSON.stringify(merged, null, 2), "utf8");
console.log("🎉 Saved all:", outAll);
console.log("🎉 Total unique:", all.length);
})();