Indeed

URL: https://www.indeed.com

Get all ads from an Indeed search in a CSV file

Description

Exports the following information:
- Ad Title
- Location
- Summary and full description
- Salary
- Type of employment
- Company Name
- Link to the announcement

Initial script (INITIAL)

//WIZ_TITLE Indeed
jobTerm = "senior IT manager"; //WIZ_VARIABLE #name:The title of the job you want to search for
where = "London"; //WIZ_VARIABLE #name:Where is this job located (within 25km) 
country = "uk"; //WIZ_VARIABLE #name:Country #code:["fr","uk","deit","es","nl"]
nbResults = 50; //WIZ_VARIABLE #name:Number of ads to browse


//WIZ_TITLE The CSV
//WIZ_COMMENT While the script is running, never open the CSV directly. It would become read-only, would be inaccessible, and the script would crash. Instead, make a copy and open there to check. 
pathDir = ''; //WIZ_VARIABLE #name:Path of the directory where the CSV must be writtend (leave blank to chose your Desktop)
nameCSV = 'my_export'; //WIZ_VARIABLE #name:Name of the CSV file
deleteCSVstart = true; //WIZ_VARIABLE #name:Remove the CSV on startup if a file already exists

//WIZ_TITLE The CSV
//WIZ_COMMENT Indeed can sometimes decide to block you if you make too many requests. In this case, go to their site, enter the captcha. For example, use the Developer Tools Network tool in Firefox, take a request from Indeed and copy the value of the Cookie field from any request sent to Indeed. 
cookie='' //WIZ_VARIABLE #name:Indeed cookie if needed

if(!pathDir) pathDir=path("desktop")
pathCSV=pathDir+nameCSV+".csv"

if(deleteCSVstart) delete(pathCSV)

setCookieValue(cookie, "https://"+country+".indeed.com/")

iResult=0

nbPages=ceil(nbResults/10)
for(iPage=0;iPage<nbPages;iPage++)
{
	htmlResult=getPage("https://"+country+".indeed.com/jobs?q="+urlEncode(jobTerm)+"&l="+urlEncode(where)+"&start="+(iPage*10))
	selectAll(".jobsearch-SerpJobCard",htmlResult,"outerHTML").each{job->
		if(iResult>nbResults) return;
	
		title=cleanSelect("h2", job)
		company=cleanSelect(".company", job)
		location=cleanSelect(".location", job)
		summary=cleanSelect(".summary", job)
		link=cleanSelect("a.jobtitle",job,"href")

		htmlJob=getPage("https://"+country+".indeed.com"+link)
		info1=cleanSelect(".jobsearch-DesktopStickyContainer .jobsearch-JobMetadataHeader-item span:nth-child(1)", htmlJob)
		info2=cleanSelect(".jobsearch-DesktopStickyContainer .jobsearch-JobMetadataHeader-item span:nth-child(2)", htmlJob)
		info3=cleanSelect(".jobsearch-DesktopStickyContainer .jobsearch-JobMetadataHeader-item span:nth-child(3)", htmlJob)
		
		description=cleanSelect("#jobDescriptionText", htmlJob,null,"description")

		csv(pathCSV,[
			"Title":title,
			"Location":location,
			"Summary":summary,
			"Info 1":info1,
			"Info 2":info2,
			"Info 3":info3,
			"Company":company,
			"description":description,
			"Link":"https://"+country+".indeed.com"+link,
		])

		iResult++
	}
}

//MATRICULE 5D99