Introduction
Puppeteer is a Node.js library that provides a high-level API to control Chrome/Chromium. This guide covers deploying Puppeteer scripts on Shard Cloud.
Creating Your Project
Ensure you have Node.js and npm installed.
Installing Dependencies
npm init -y
npm install puppeteer
Basic Puppeteer Script
Create an index.js file:
const puppeteer = require ( 'puppeteer' );
async function run () {
const browser = await puppeteer . launch ({
headless: 'new' ,
args: [
'--no-sandbox' ,
'--disable-setuid-sandbox' ,
'--disable-dev-shm-usage' ,
'--disable-gpu'
]
});
const page = await browser . newPage ();
await page . setViewport ({ width: 1920 , height: 1080 });
try {
// Navigate to a page
await page . goto ( 'https://example.com' );
console . log ( 'Page title:' , await page . title ());
// Take a screenshot
await page . screenshot ({ path: 'screenshot.png' });
console . log ( 'Screenshot saved!' );
} catch ( error ) {
console . error ( 'Error:' , error );
} finally {
await browser . close ();
}
}
run ();
The --no-sandbox and --disable-setuid-sandbox flags are required for Puppeteer to work in containerized environments.
Package.json Configuration
{
"name" : "puppeteer-app" ,
"version" : "1.0.0" ,
"main" : "index.js" ,
"scripts" : {
"start" : "node index.js"
},
"dependencies" : {
"puppeteer" : "^21.0.0"
}
}
Shard Cloud Configuration
Create a .shardcloud file:
DISPLAY_NAME = Puppeteer Bot
DESCRIPTION = Browser Automation with Puppeteer
MAIN = index.js
MEMORY = 1024
VERSION = recommended
Puppeteer applications require at least 512MB-1024MB of memory due to the browser overhead.
Continuous Screenshot Example
const puppeteer = require ( 'puppeteer' );
async function continuousScreenshots () {
const browser = await puppeteer . launch ({
headless: 'new' ,
args: [
'--no-sandbox' ,
'--disable-setuid-sandbox' ,
'--disable-dev-shm-usage'
]
});
const page = await browser . newPage ();
await page . setViewport ({ width: 1920 , height: 1080 });
await page . goto ( 'https://example.com' );
// Take screenshots every minute
setInterval ( async () => {
try {
await page . screenshot ({ path: 'latest.png' });
console . log ( 'Screenshot updated at' , new Date (). toISOString ());
} catch ( error ) {
console . error ( 'Screenshot error:' , error );
}
}, 60000 );
console . log ( 'Puppeteer running...' );
}
continuousScreenshots ();
Web Scraping Example
const puppeteer = require ( 'puppeteer' );
async function scrape ( url ) {
const browser = await puppeteer . launch ({
headless: 'new' ,
args: [ '--no-sandbox' , '--disable-setuid-sandbox' ]
});
const page = await browser . newPage ();
try {
await page . goto ( url , { waitUntil: 'networkidle2' });
// Extract data
const data = await page . evaluate (() => {
return {
title: document . title ,
heading: document . querySelector ( 'h1' )?. textContent ,
links: Array . from ( document . querySelectorAll ( 'a' )). map ( a => a . href ). slice ( 0 , 10 )
};
});
console . log ( 'Scraped data:' , data );
return data ;
} catch ( error ) {
console . error ( 'Scraping error:' , error );
} finally {
await browser . close ();
}
}
scrape ( 'https://example.com' );
PDF Generation Example
const puppeteer = require ( 'puppeteer' );
async function generatePDF ( url , outputPath ) {
const browser = await puppeteer . launch ({
headless: 'new' ,
args: [ '--no-sandbox' , '--disable-setuid-sandbox' ]
});
const page = await browser . newPage ();
await page . goto ( url , { waitUntil: 'networkidle2' });
await page . pdf ({
path: outputPath ,
format: 'A4' ,
printBackground: true
});
console . log ( 'PDF saved to' , outputPath );
await browser . close ();
}
generatePDF ( 'https://example.com' , 'page.pdf' );
Deploying
Prepare Your Files
Ensure you have:
index.js (or your script)
package.json
.shardcloud
Exclude Unnecessary Files
Remove: node_modules/, package-lock.json
Create ZIP Archive
Compress your project folder.
Accessing Output Files
Screenshots and PDFs saved by your script can be downloaded from Shard Cloud’s file manager in the dashboard.
Additional Resources
Troubleshooting
Chrome crashed or failed to launch
Ensure headless mode is enabled
Add all required flags (--no-sandbox, --disable-setuid-sandbox, --disable-dev-shm-usage)
Increase memory allocation to at least 1024MB
Increase the timeout in page.goto() options
Use waitUntil: 'networkidle2' for dynamic pages
Close browser instances after each task
Increase MEMORY in your config
Avoid keeping multiple pages open