adding test + refacto

This commit is contained in:
koudo
2022-01-07 23:44:28 +01:00
parent 8ef6d168c8
commit adbd632db0
27 changed files with 4701 additions and 160 deletions
+12
View File
@@ -0,0 +1,12 @@
import * as phantom from "../phantom";
process.exit = (() => undefined) as unknown as () => never;
describe("main", () => {
it("should be launched at import", async () => {
const marmitonPhantomMock = jest
.spyOn(phantom, "marmitonPhantom")
.mockResolvedValue(undefined as never);
await import("../index");
await expect(marmitonPhantomMock).toBeCalledTimes(1);
});
});
+53
View File
@@ -0,0 +1,53 @@
/**
* @jest-environment jsdom
*/
import { scrapResultFromLink } from "../parsing";
function createElementFromHTML(htmlString: string): Element {
const div = document.createElement("div");
div.innerHTML = htmlString.trim();
// Change this to div.childNodes to support multiple top-level nodes
return div.firstElementChild as Element;
}
describe("scrapResultFromLink", () => {
it("should return correct values", () => {
const url = "url";
const links = [
`<a href="/test1"><h4>name 1</h4><div>3.5/6 (546 com)</div></a>`,
`<a href="/test1"><h4>name 1</h4></a>`,
`<h4>name 1</h4><div>3.5/6 (546 com)</div>`,
`<a href="/test1"><h4>tes</h4><div></div></a>`,
].map(createElementFromHTML);
expect(scrapResultFromLink(links, url)).toEqual([
{
name: "name 1",
url: `${url}/test1`,
commentCount: 546,
score: 3.5,
},
{
name: "name 1",
url: `${url}/test1`,
commentCount: 0,
score: 0,
},
{
name: "",
url: `${url}`,
commentCount: 0,
score: 0,
},
{
name: "tes",
url: `${url}/test1`,
commentCount: 0,
score: 0,
},
]);
});
});
+77
View File
@@ -0,0 +1,77 @@
import { marmitonPhantom } from "../phantom";
import * as validate from "../validate";
import * as search from "../search";
import { buster } from "../../utils/buster";
// const processExitMock = jest.spyOn(process, "exit");
process.exit = jest.fn() as unknown as () => never;
console.log = () => undefined;
describe("marmitonPhantom", () => {
it("should check argument", async () => {
const validateMarmitonSearchInputMock = jest.spyOn(
validate,
"validateMarmitonSearchInput"
);
await marmitonPhantom();
expect(validateMarmitonSearchInputMock).toBeCalledWith(buster.argument);
});
it("should launch search", async () => {
const searchMock = jest.spyOn(search, "search");
jest.spyOn(validate, "validateMarmitonSearchInput").mockReturnValue(true);
await marmitonPhantom();
const { query, ...opts } = buster.argument as MarmitonSearchInput;
expect(searchMock).toBeCalledWith(query, opts);
});
it("should not launch search if validateMarmitonSearchInput return false", async () => {
const searchMock = jest.spyOn(search, "search");
jest.spyOn(validate, "validateMarmitonSearchInput").mockReturnValue(false);
await marmitonPhantom();
expect(searchMock).not.toBeCalled();
});
it("should set result in buster.setResultObject", async () => {
const result: MarmitonSearchResult[] = [
{
commentCount: 0,
name: "name",
score: 0,
url: "url",
},
];
jest
.spyOn(validate, "validateMarmitonSearchInput")
.mockReturnValueOnce(true);
const setResultObjectMock = jest.spyOn(buster, "setResultObject");
const searchMock = jest.spyOn(search, "search");
searchMock.mockResolvedValueOnce(result);
await marmitonPhantom();
expect(setResultObjectMock).toBeCalledWith(result);
});
it("should call process.exit", async () => {
await marmitonPhantom();
expect(process.exit).toBeCalled();
});
it("should call process.exit even if search throw", async () => {
await marmitonPhantom();
jest.spyOn(search, "search").mockRejectedValueOnce(new Error("error"));
expect(process.exit).toBeCalled();
});
it("should call process.exit even if setResultObject throw", async () => {
await marmitonPhantom();
jest
.spyOn(buster, "setResultObject")
.mockRejectedValueOnce(new Error("error"));
expect(process.exit).toBeCalled();
});
});
+114
View File
@@ -0,0 +1,114 @@
import { validateMarmitonSearchInput } from "../validate";
console.log = () => undefined;
describe("validateMarmitonSearchInput", () => {
it("should return true if argument contains query", () => {
expect(
validateMarmitonSearchInput({
query: "query",
})
).toBeTruthy();
});
it("should return true if argument contains dt", () => {
expect(
validateMarmitonSearchInput({
query: "query",
dt: "entree",
})
).toBeTruthy();
expect(
validateMarmitonSearchInput({
query: "query",
dt: ["entree", "dessert"],
})
).toBeTruthy();
});
it("should return true if argument contains page", () => {
expect(
validateMarmitonSearchInput({
query: "query",
page: 1,
})
).toBeTruthy();
expect(
validateMarmitonSearchInput({
query: "query",
page: 32,
})
).toBeTruthy();
});
it("should return true if argument contains type", () => {
expect(
validateMarmitonSearchInput({
query: "query",
type: "season",
})
).toBeTruthy();
expect(
validateMarmitonSearchInput({
query: "query",
type: ["season", "recipe"],
})
).toBeTruthy();
});
it("should return true if argument contains all options", () => {
expect(
validateMarmitonSearchInput({
query: "query",
type: "recipe",
dt: "entree",
page: 1,
})
).toBeTruthy();
expect(
validateMarmitonSearchInput({
query: "query",
type: ["season", "recipe"],
dt: ["entree", "dessert"],
page: 54,
})
).toBeTruthy();
});
it("should return false if argument does not contains query", () => {
expect(validateMarmitonSearchInput({})).toBeFalsy();
});
it("should return false if argument contains wrong dt", () => {
expect(validateMarmitonSearchInput({ query: "query", dt: 1 })).toBeFalsy();
expect(
validateMarmitonSearchInput({ query: "query", dt: "1" })
).toBeFalsy();
expect(
validateMarmitonSearchInput({ query: "query", dt: false })
).toBeFalsy();
expect(
validateMarmitonSearchInput({ query: "query", dt: null })
).toBeFalsy();
});
it("should return false if argument contains wrong type", () => {
expect(
validateMarmitonSearchInput({ query: "query", type: 1 })
).toBeFalsy();
expect(
validateMarmitonSearchInput({ query: "query", type: "1" })
).toBeFalsy();
expect(
validateMarmitonSearchInput({ query: "query", type: false })
).toBeFalsy();
});
it("should return false if argument contains wrong page", () => {
expect(
validateMarmitonSearchInput({ query: "query", page: -1 })
).toBeFalsy();
expect(
validateMarmitonSearchInput({ query: "query", page: "-1" })
).toBeFalsy();
expect(
validateMarmitonSearchInput({ query: "query", page: false })
).toBeFalsy();
});
it("should return false if argument contains unknown property", () => {
expect(
validateMarmitonSearchInput({ query: "query", test: -1 })
).toBeFalsy();
});
});
+10
View File
@@ -0,0 +1,10 @@
/* eslint-disable */
// Phantombuster configuration {
"phantombuster command: nodejs"
"phantombuster package: 5"
// }
/* eslint-enable */
import { marmitonPhantom } from "./phantom";
marmitonPhantom();
+78
View File
@@ -0,0 +1,78 @@
{
"$ref": "#/definitions/MarmitonSearchInput",
"$schema": "http://json-schema.org/draft-07/schema#",
"definitions": {
"MarmitonSearchInput": {
"additionalProperties": false,
"properties": {
"dt": {
"anyOf": [
{
"enum": [
"entree",
"platprincipal",
"dessert",
"amusegueule",
"accompagnement",
"sauce",
"boisson",
"confiserie",
"conseil"
],
"type": "string"
},
{
"items": {
"enum": [
"entree",
"platprincipal",
"dessert",
"amusegueule",
"accompagnement",
"sauce",
"boisson",
"confiserie",
"conseil"
],
"type": "string"
},
"type": "array"
}
]
},
"page": {
"type": "integer",
"minimum": 0
},
"query": {
"type": "string"
},
"type": {
"anyOf": [
{
"enum": [
"season",
"recipe"
],
"type": "string"
},
{
"items": {
"enum": [
"season",
"recipe"
],
"type": "string"
},
"type": "array"
}
]
}
},
"required": [
"query"
],
"type": "object"
}
}
}
+33
View File
@@ -0,0 +1,33 @@
{
"$ref": "#/definitions/MarmitonSearchOutput",
"$schema": "http://json-schema.org/draft-07/schema#",
"definitions": {
"MarmitonSearchOutput": {
"items": {
"additionalProperties": false,
"properties": {
"commentCount": {
"type": "number"
},
"name": {
"type": "string"
},
"score": {
"type": "number"
},
"url": {
"type": "string"
}
},
"required": [
"name",
"url",
"score",
"commentCount"
],
"type": "object"
},
"type": "array"
}
}
}
+40
View File
@@ -0,0 +1,40 @@
/**
* function that extract content from DOM links into the marmiton search page
* @param links scrapped <a> dom elements representing recipes in marmiton
* @param urlStart url to prefix recipes links
* @returns name, link, comments and score of recipes
*/
export const scrapResultFromLink = (
links: Element[],
urlStart: string
): MarmitonSearchResult[] =>
links.map((element) => {
const titleElem = element.querySelector("h4");
let name = "",
commentCount = 0,
score = 0;
if (titleElem) {
name = titleElem.textContent ?? "";
const bottomString =
(titleElem.parentElement?.lastElementChild as Element).textContent ??
"";
const strSplit = bottomString.split("(");
const [scorePart, commentString] =
strSplit.length > 1 ? strSplit : ["", ""];
commentCount = Number.parseInt(commentString.split(" ")[0]);
score = Number.parseFloat(scorePart.split("/")[0]);
}
return {
name,
url: `${urlStart}${element.getAttribute("href") ?? ""}`,
score: Number.isNaN(score) ? 0 : score,
commentCount: Number.isNaN(commentCount) ? 0 : commentCount,
};
});
+22
View File
@@ -0,0 +1,22 @@
import { buster } from "../utils/buster";
import { search } from "./search";
import { validateMarmitonSearchInput } from "./validate";
/**
* main function to launch from phantom
*/
export const marmitonPhantom = async () => {
if (validateMarmitonSearchInput(buster.argument)) {
const { query, ...opts } = buster.argument;
try {
const results = await search(query, opts);
await buster.setResultObject(results);
} catch (err) {
console.log("could not get result from marmiton:", err);
}
}
process.exit();
};
+49
View File
@@ -0,0 +1,49 @@
import { scrapResultFromLink } from "./parsing";
import { optionsToUrlParams, withPageOpened } from "../utils/browser";
const MARMITON_URL = "https://www.marmiton.org";
/**
* url to seach into marmiton search form
*/
const SEARCH_URL = `${MARMITON_URL}/recettes/recherche.aspx`;
/**
* selector to get search item list
*/
const LINK_SELECTOR = "div>a[href^='/recettes/recette']";
/**
* options you can use to filter data
*/
const AVAILABLE_OPTIONS: (keyof SearchOptions | "aqt")[] = [
"type",
"dt",
"page",
"aqt",
];
/**
* function to scrap in the search response on marmiton website
* @param search words to search into marmiton search
* @param opts option to filter results
* @returns marmiton search result
*/
export const search = (
search: string,
opts?: SearchOptions
): Promise<MarmitonSearchResult[]> => {
const argsString = optionsToUrlParams(
{
...opts,
aqt: search,
},
AVAILABLE_OPTIONS
);
return withPageOpened(async (page) => {
await page.goto(`${SEARCH_URL}?${argsString}`);
return page.$$eval(LINK_SELECTOR, scrapResultFromLink, MARMITON_URL);
});
};
+9 -8
View File
@@ -1,13 +1,15 @@
declare type MarmitonSearchResult = {
declare type MarmitonSearchOutput = {
name: string;
url: string;
score: number;
commentCount: number;
};
}[];
declare type SearchRecipeType = "season" | "recipe";
declare type MarmitonSearchResult = MarmitonSearchOutput[number];
declare type SearchDishType =
type SearchRecipeType = "season" | "recipe";
type SearchDishType =
| "entree"
| "platprincipal"
| "dessert"
@@ -18,12 +20,11 @@ declare type SearchDishType =
| "confiserie"
| "conseil";
declare type SearchOptions = {
declare type MarmitonSearchInput = {
query: string;
type?: SearchRecipeType | SearchRecipeType[];
dt?: SearchDishType | SearchDishType[];
page?: number;
};
declare type MarmitonSearchInput = {
query: string;
} & SearchOptions;
declare type SearchOptions = Omit<MarmitonSearchInput, "query">;
+29
View File
@@ -0,0 +1,29 @@
import validator from "is-my-json-valid";
export const jsonSchema = require("./input.json");
const MarmitonSearchInputValidator = validator(jsonSchema);
/**
* validate value against json schema defined into input.json file
* @param value value to check
* @returns true if value is valid false otherwise
*/
export function validateMarmitonSearchInput(
value: unknown
): value is MarmitonSearchInput {
if (MarmitonSearchInputValidator(value)) {
return true;
}
console.log(`received input : ${JSON.stringify(value, null, 2)}`);
console.log(
`input schema : ${JSON.stringify(
jsonSchema.definitions.MarmitonSearchInput.properties,
null,
2
)}`
);
return false;
}
-96
View File
@@ -1,96 +0,0 @@
/* eslint-disable */
// Phantombuster configuration {
"phantombuster command: nodejs"
"phantombuster package: 6"
// }
/* eslint-enable */
import Buster from "phantombuster";
import puppeteer from "puppeteer";
const MARMITON_URL = "https://www.marmiton.org";
const buster = new Buster();
const optionsToUrlParams = (
opts: Record<string, unknown | unknown[]>
): string => {
return Object.keys(opts)
.map((key) => {
const value = opts[key];
if (Array.isArray(value)) {
return value.map((val) => `${key}=${val}`).join("&");
} else {
return `${key}=${value}`;
}
})
.join("&");
};
export const search = async (
search: string,
opts?: SearchOptions
): Promise<MarmitonSearchResult[]> => {
const browser = await puppeteer.launch({
args: ["--no-sandbox"],
});
const page = await browser.newPage();
const argsString = optionsToUrlParams({
...opts,
aqt: search,
});
await page.goto(`${MARMITON_URL}/recettes/recherche.aspx?${argsString}`);
const list = await page.$$eval(
"div>a[href^='/recettes/recette']",
(links: Element[], urlStart: string) =>
links.map((element) => {
const titleElem = element.querySelector("h4");
const bottomString =
titleElem?.parentElement?.lastElementChild?.textContent ?? "";
const [scorePart, commentString] = bottomString.split("(");
return {
name: titleElem?.innerText ?? "",
url: `${urlStart}${element.getAttribute("href")}` ?? "",
score: Number.parseFloat(scorePart.split("/")[0]),
commentCount: Number.parseInt(commentString.split(" ")[0]),
};
}),
MARMITON_URL
);
const results = list;
await page.close();
await browser.close();
return results;
};
const main = async () => {
const { query, ...opts } = buster.argument as MarmitonSearchInput;
let results: unknown[] = [];
try {
results = await search(query ?? "crepe", opts);
} catch (err) {
console.log("could not get result from marmiton:", err);
}
try {
await buster.setResultObject(results);
} catch (err) {
console.log("Could not set the result object:", err);
}
process.exit();
};
main();
+99
View File
@@ -0,0 +1,99 @@
import {
optionsToUrlParams,
withBrowserOpened,
withPageOpened,
} from "../browser";
const pageCloseMock = jest.fn();
const browserCloseMock = jest.fn();
const pageMock = {
close: pageCloseMock,
};
const browserMock = {
newPage: () => Promise.resolve(pageMock),
close: browserCloseMock,
};
jest.mock("puppeteer", () => {
return {
launch: () => Promise.resolve(browserMock),
};
});
describe("withPageOpened", () => {
it("should launch argument function once with page as argument", async () => {
const mockFunc = jest.fn();
await withPageOpened(mockFunc);
expect(mockFunc).toHaveBeenCalledTimes(1);
expect(mockFunc).toHaveBeenCalledWith(pageMock);
});
it("should throw on error if argument function throws", async () => {
const error = new Error("test error");
const mockFunc = jest.fn().mockRejectedValueOnce(error);
await expect(() => withPageOpened(mockFunc)).rejects.toThrow(error);
});
it("should close page before resolve", async () => {
await withPageOpened(() => Promise.resolve(undefined));
expect(pageCloseMock).toHaveBeenCalledTimes(1);
});
});
describe("withBrowserOpen", () => {
it("should launch argument function with browser as argument", async () => {
const mockFunc = jest.fn();
await withBrowserOpened(mockFunc);
expect(mockFunc).toHaveBeenCalledTimes(1);
expect(mockFunc).toHaveBeenCalledWith(browserMock);
});
it("should close browser before resolve", async () => {
await withBrowserOpened(() => Promise.resolve(undefined));
expect(browserCloseMock).toHaveBeenCalledTimes(1);
});
it("should throw on error if argument function throws", async () => {
const error = new Error("test error");
const mockFunc = jest.fn().mockRejectedValueOnce(error);
await expect(() => withBrowserOpened(mockFunc)).rejects.toThrow(error);
});
});
describe("optionsToUrlParams", () => {
it("should return correct string", async () => {
expect(
optionsToUrlParams({ test: 1, truc: "2", machin: true }, [
"test",
"truc",
"machin",
])
).toEqual("test=1&truc=2&machin=true");
});
it("should hide value against the second param", async () => {
expect(
optionsToUrlParams({ test: 1, truc: "2", machin: true }, ["test", "truc"])
).toEqual("test=1&truc=2");
});
it("should repeat value if it is an array", async () => {
expect(optionsToUrlParams({ truc: ["2", "4", "5"] }, ["truc"])).toEqual(
"truc=2&truc=4&truc=5"
);
});
it("should", async () => {
expect(
optionsToUrlParams({ test: 1, truc: "2", machin: true }, [
"bidule" as "machin",
"truc",
])
).toEqual("truc=2");
});
});
+67
View File
@@ -0,0 +1,67 @@
import puppeteer from "puppeteer";
/**
* open a browser with puppeteer and call the parameter function within it
* the browser will be closed after func execution
* @param func function to execute after opening the browser and before closing it
* @returns
*/
export const withBrowserOpened = async <T>(
func: (browser: puppeteer.Browser) => Promise<T>
): Promise<T> => {
const browser = await puppeteer.launch({
args: ["--no-sandbox"],
});
const res = await func(browser);
await browser.close();
return res;
};
/**
* open a page with puppeteer and call the parameter function within it
* the page will be closed after func execution
* @param func function to execute after opening a page and before closing it
* @returns
*/
export const withPageOpened = async <T>(
func: (page: puppeteer.Page) => Promise<T>
): Promise<T> => {
return await withBrowserOpened(async (browser) => {
const page = await browser.newPage();
const res = await func(page);
await page.close();
return res;
});
};
/**
* transform object to url parameters
* array will write multiple time the same argument name
* @param opts object to transform
* @param whitelist key to write from object
* @returns
*/
export const optionsToUrlParams = <T, V extends keyof T>(
opts: T,
whitelist: V[]
): string => {
return whitelist
.map((key) => {
if (!opts[key]) return "";
const value = opts[key];
if (Array.isArray(value)) {
return value.map((val) => `${key}=${val}`).join("&");
} else {
return `${key}=${value}`;
}
})
.filter(Boolean)
.join("&");
};
+6
View File
@@ -0,0 +1,6 @@
import Buster from "phantombuster";
/**
* phantombuster client
*/
export const buster = new Buster();