Parse provider API urls + use new provider api in runAll scrape

This commit is contained in:
mrjvs 2023-12-18 21:47:19 +01:00
parent de30929dd6
commit a52fac701a
3 changed files with 249 additions and 92 deletions

View file

@ -1,7 +1,11 @@
import { ScrapeMedia } from "@movie-web/providers"; import {
FullScraperEvents,
RunOutput,
ScrapeMedia,
} from "@movie-web/providers";
import { RefObject, useCallback, useEffect, useRef, useState } from "react"; import { RefObject, useCallback, useEffect, useRef, useState } from "react";
import { providers } from "@/utils/providers"; import { getLoadbalancedProviderApiUrl, providers } from "@/utils/providers";
export interface ScrapingItems { export interface ScrapingItems {
id: string; id: string;
@ -18,20 +22,17 @@ export interface ScrapingSegment {
percentage: number; percentage: number;
} }
export function useScrape() { type ScraperEvent<Event extends keyof FullScraperEvents> = Parameters<
NonNullable<FullScraperEvents[Event]>
>[0];
function useBaseScrape() {
const [sources, setSources] = useState<Record<string, ScrapingSegment>>({}); const [sources, setSources] = useState<Record<string, ScrapingSegment>>({});
const [sourceOrder, setSourceOrder] = useState<ScrapingItems[]>([]); const [sourceOrder, setSourceOrder] = useState<ScrapingItems[]>([]);
const [currentSource, setCurrentSource] = useState<string>(); const [currentSource, setCurrentSource] = useState<string>();
const lastId = useRef<string | null>(null);
const startScraping = useCallback( const initEvent = useCallback((evt: ScraperEvent<"init">) => {
async (media: ScrapeMedia) => {
if (!providers) return null;
let lastId: string | null = null;
const output = await providers.runAll({
media,
events: {
init(evt) {
setSources( setSources(
evt.sourceIds evt.sourceIds
.map((v) => { .map((v) => {
@ -51,16 +52,18 @@ export function useScrape() {
}, {}) }, {})
); );
setSourceOrder(evt.sourceIds.map((v) => ({ id: v, children: [] }))); setSourceOrder(evt.sourceIds.map((v) => ({ id: v, children: [] })));
}, }, []);
start(id) {
const startEvent = useCallback((id: ScraperEvent<"start">) => {
setSources((s) => { setSources((s) => {
if (s[id]) s[id].status = "pending"; if (s[id]) s[id].status = "pending";
return { ...s }; return { ...s };
}); });
setCurrentSource(id); setCurrentSource(id);
lastId = id; lastId.current = id;
}, }, []);
update(evt) {
const updateEvent = useCallback((evt: ScraperEvent<"update">) => {
setSources((s) => { setSources((s) => {
if (s[evt.id]) { if (s[evt.id]) {
s[evt.id].status = evt.status; s[evt.id].status = evt.status;
@ -70,8 +73,10 @@ export function useScrape() {
} }
return { ...s }; return { ...s };
}); });
}, }, []);
discoverEmbeds(evt) {
const discoverEmbedsEvent = useCallback(
(evt: ScraperEvent<"discoverEmbeds">) => {
setSources((s) => { setSources((s) => {
evt.embeds.forEach((v) => { evt.embeds.forEach((v) => {
const source = providers.getMetadata(v.embedScraperId); const source = providers.getMetadata(v.embedScraperId);
@ -94,20 +99,92 @@ export function useScrape() {
return [...s]; return [...s];
}); });
}, },
}, []
}); );
if (output && lastId) { const startScrape = useCallback(() => {
lastId.current = null;
}, []);
const getResult = useCallback((output: RunOutput | null) => {
if (output && lastId.current) {
setSources((s) => { setSources((s) => {
if (!lastId) return s; if (!lastId.current) return s;
if (s[lastId]) s[lastId].status = "success"; if (s[lastId.current]) s[lastId.current].status = "success";
return { ...s }; return { ...s };
}); });
} }
return output; return output;
}, []);
return {
initEvent,
startEvent,
updateEvent,
discoverEmbedsEvent,
startScrape,
getResult,
sources,
sourceOrder,
currentSource,
};
}
export function useScrape() {
const {
sources,
sourceOrder,
currentSource,
updateEvent,
discoverEmbedsEvent,
initEvent,
getResult,
startEvent,
startScrape,
} = useBaseScrape();
const startScraping = useCallback(
async (media: ScrapeMedia) => {
const providerApiUrl = getLoadbalancedProviderApiUrl();
if (providerApiUrl) {
startScrape();
const sseOutput = await new Promise<RunOutput | null>(
(resolve, reject) => {
const scrapeEvents = new EventSource(providerApiUrl);
scrapeEvents.addEventListener("error", (err) => reject(err));
scrapeEvents.addEventListener("init", (e) => initEvent(e.data));
scrapeEvents.addEventListener("start", (e) => startEvent(e.data));
scrapeEvents.addEventListener("update", (e) => updateEvent(e.data));
scrapeEvents.addEventListener("discoverEmbeds", (e) =>
discoverEmbedsEvent(e.data)
);
scrapeEvents.addEventListener("finish", (e) => resolve(e.data));
}
);
return getResult(sseOutput);
}
if (!providers) return null;
startScrape();
const output = await providers.runAll({
media,
events: {
init: initEvent,
start: startEvent,
update: updateEvent,
discoverEmbeds: discoverEmbedsEvent,
}, },
[setSourceOrder, setSources] });
return getResult(output);
},
[
initEvent,
startEvent,
updateEvent,
discoverEmbedsEvent,
getResult,
startScrape,
]
); );
return { return {

View file

@ -7,22 +7,25 @@ import {
targets, targets,
} from "@movie-web/providers"; } from "@movie-web/providers";
import { conf } from "@/setup/config"; import { getProviderApiUrls, getProxyUrls } from "@/utils/proxyUrls";
import { useAuthStore } from "@/stores/auth";
const originalUrls = conf().PROXY_URLS; function makeLoadbalancedList(getter: () => string[]) {
let fetchersIndex = -1; let listIndex = -1;
return () => {
export function getLoadbalancedProxyUrl() { const fetchers = getter();
const fetchers = useAuthStore.getState().proxySet ?? originalUrls; if (listIndex === -1 || listIndex >= fetchers.length) {
if (fetchersIndex === -1 || fetchersIndex >= fetchers.length) { listIndex = Math.floor(Math.random() * fetchers.length);
fetchersIndex = Math.floor(Math.random() * fetchers.length);
} }
const proxyUrl = fetchers[fetchersIndex]; const proxyUrl = fetchers[listIndex];
fetchersIndex = (fetchersIndex + 1) % fetchers.length; listIndex = (listIndex + 1) % fetchers.length;
return proxyUrl; return proxyUrl;
};
} }
const getLoadbalancedProxyUrl = makeLoadbalancedList(getProxyUrls);
export const getLoadbalancedProviderApiUrl =
makeLoadbalancedList(getProviderApiUrls);
function makeLoadBalancedSimpleProxyFetcher() { function makeLoadBalancedSimpleProxyFetcher() {
const fetcher: ProviderBuilderOptions["fetcher"] = (a, b) => { const fetcher: ProviderBuilderOptions["fetcher"] = (a, b) => {
const currentFetcher = makeSimpleProxyFetcher( const currentFetcher = makeSimpleProxyFetcher(

77
src/utils/proxyUrls.ts Normal file
View file

@ -0,0 +1,77 @@
import { conf } from "@/setup/config";
import { useAuthStore } from "@/stores/auth";
const originalUrls = conf().PROXY_URLS;
const types = ["proxy", "api"] as const;
type ParsedUrlType = (typeof types)[number];
export interface ParsedUrl {
url: string;
type: ParsedUrlType;
}
function canParseUrl(url: string): boolean {
try {
return !!new URL(url);
} catch {
return false;
}
}
function isParsedUrlType(type: string): type is ParsedUrlType {
return types.includes(type as any);
}
/**
* Turn a string like "a=b,c=d,d=e" into a dictionary object
*/
function parseParams(input: string): Record<string, string> {
const entriesParams = input
.split(",")
.map((param) => param.split("=", 2).filter((part) => part.length !== 0))
.filter((v) => v.length === 2);
return Object.fromEntries(entriesParams);
}
export function getParsedUrls() {
const urls = useAuthStore.getState().proxySet ?? originalUrls;
const output: ParsedUrl[] = [];
urls.forEach((url) => {
if (!url.startsWith("|")) {
if (canParseUrl(url)) {
output.push({
url,
type: "proxy",
});
return;
}
}
const match = url.match(/^|([^|])+|(.*)$/g);
if (!match || !match[2]) return;
if (!canParseUrl(match[2])) return;
const params = parseParams(match[1]);
const type = params.type ?? "proxy";
if (!isParsedUrlType(type)) return;
output.push({
url: match[2],
type,
});
});
return output;
}
export function getProxyUrls() {
return getParsedUrls()
.filter((v) => v.type === "proxy")
.map((v) => v.url);
}
export function getProviderApiUrls() {
return getParsedUrls()
.filter((v) => v.type === "api")
.map((v) => v.url);
}