add RedTube video/related scraping
This commit is contained in:
parent
401351289b
commit
dd8e9838a3
|
@ -2,6 +2,10 @@ import { getCookie } from "@/utils/cookies/read"
|
||||||
|
|
||||||
import { Cookies, RedTubeOrientations } from "@/meta/settings"
|
import { Cookies, RedTubeOrientations } from "@/meta/settings"
|
||||||
import { REDTUBE_BASE_SEARCH, REDTUBE_BASE_GAY_SEARCH, REDTUBE_BASE_URL_GAY, REDTUBE_BASE_URL, REDTUBE_BASE_URL_TRANS } from "@/constants/urls"
|
import { REDTUBE_BASE_SEARCH, REDTUBE_BASE_GAY_SEARCH, REDTUBE_BASE_URL_GAY, REDTUBE_BASE_URL, REDTUBE_BASE_URL_TRANS } from "@/constants/urls"
|
||||||
|
import { getHeadersWithCookie } from "../common/headers"
|
||||||
|
import { MindGeekVideoSrcElem, VideoSourceItem } from "@/meta/data"
|
||||||
|
|
||||||
|
import axios from "axios"
|
||||||
|
|
||||||
export const getRedTubeQueryUrl = async (query?: string): Promise<string> => {
|
export const getRedTubeQueryUrl = async (query?: string): Promise<string> => {
|
||||||
const orientation = await getCookie(Cookies.orientation)
|
const orientation = await getCookie(Cookies.orientation)
|
||||||
|
@ -35,4 +39,33 @@ export const getRedTubeResultsWrapperId = async (query?: string): Promise<string
|
||||||
}
|
}
|
||||||
|
|
||||||
return "#most_recent_videos li"
|
return "#most_recent_videos li"
|
||||||
|
}
|
||||||
|
|
||||||
|
export const getRedTubeMediaUrlList = async (url: string, sessionCookie: string): Promise<VideoSourceItem[]> => {
|
||||||
|
|
||||||
|
const headersWithCookie = getHeadersWithCookie(REDTUBE_BASE_URL, sessionCookie)
|
||||||
|
|
||||||
|
let videos: VideoSourceItem[] = []
|
||||||
|
|
||||||
|
await axios.get(url, headersWithCookie)
|
||||||
|
.then(async response => {
|
||||||
|
|
||||||
|
if (response.data) {
|
||||||
|
|
||||||
|
videos = await response.data.map((elem: MindGeekVideoSrcElem) => ({
|
||||||
|
src: elem?.videoUrl,
|
||||||
|
type: 'video/mp4',
|
||||||
|
size: elem?.quality
|
||||||
|
})) as VideoSourceItem[]
|
||||||
|
|
||||||
|
return videos
|
||||||
|
|
||||||
|
} else {
|
||||||
|
return []
|
||||||
|
}
|
||||||
|
|
||||||
|
})
|
||||||
|
.catch(error => console.log(error))
|
||||||
|
|
||||||
|
return videos
|
||||||
}
|
}
|
|
@ -1,4 +1,15 @@
|
||||||
|
import { REDTUBE_BASE_URL } from "@/constants/urls";
|
||||||
import { FetchParams, GalleryData, VideoData, VideoSourceItem } from "@/meta/data";
|
import { FetchParams, GalleryData, VideoData, VideoSourceItem } from "@/meta/data";
|
||||||
|
import { getHeaders } from "../common/headers";
|
||||||
|
import { getDataFromRedis, storeDataIntoRedis } from "@/redis/client";
|
||||||
|
import { DEFAULT_REDTUBE_GALLERY_EXPIRY, DEFAULT_REDTUBE_VIDEO_EXPIRY, DEFAULT_RELATED_VIDEO_KEY_PATH } from "@/constants/redis";
|
||||||
|
|
||||||
|
import * as cheerio from "cheerio";
|
||||||
|
|
||||||
|
import axios, { AxiosError } from "axios";
|
||||||
|
import { createSessionCookie, findGetMediaUrlInTagblock } from "../common/mindgeek";
|
||||||
|
import { Platforms } from "@/meta/settings";
|
||||||
|
import { getRedTubeMediaUrlList } from "./url";
|
||||||
|
|
||||||
export const fetchRedTubeVideoData = async (videoId: string, params?: FetchParams): Promise<[VideoData, GalleryData[]]> => {
|
export const fetchRedTubeVideoData = async (videoId: string, params?: FetchParams): Promise<[VideoData, GalleryData[]]> => {
|
||||||
|
|
||||||
|
@ -9,5 +20,72 @@ export const fetchRedTubeVideoData = async (videoId: string, params?: FetchParam
|
||||||
|
|
||||||
let relatedData: GalleryData[] = [];
|
let relatedData: GalleryData[] = [];
|
||||||
|
|
||||||
|
let mediaUrl, sessionCookie, convertedData: VideoSourceItem[]
|
||||||
|
|
||||||
|
let reqHeaders = getHeaders(REDTUBE_BASE_URL)
|
||||||
|
|
||||||
|
const queryUrl = `${REDTUBE_BASE_URL}/${videoId.replace(/\//g, '')}`
|
||||||
|
|
||||||
|
const cachedVideoData = await getDataFromRedis(queryUrl)
|
||||||
|
const cachedRelatedData = await getDataFromRedis(queryUrl + DEFAULT_RELATED_VIDEO_KEY_PATH)
|
||||||
|
|
||||||
|
if (cachedVideoData) {
|
||||||
|
return [cachedVideoData as VideoData, cachedRelatedData as GalleryData[] ?? []]
|
||||||
|
}
|
||||||
|
|
||||||
|
await axios.get(queryUrl, reqHeaders)
|
||||||
|
|
||||||
|
.then(async response => {
|
||||||
|
|
||||||
|
sessionCookie = response?.headers["set-cookie"] ? createSessionCookie(response?.headers["set-cookie"]) : '';
|
||||||
|
|
||||||
|
const html = response.data;
|
||||||
|
|
||||||
|
const $ = cheerio.load(html);
|
||||||
|
|
||||||
|
const scriptTags = $("script");
|
||||||
|
|
||||||
|
scriptTags.map((idx, elem) => {
|
||||||
|
const getMediaUrl = findGetMediaUrlInTagblock($(elem).toString().replace(/\\/g, ''), 'media/mp4') ?? null
|
||||||
|
|
||||||
|
if (getMediaUrl) {
|
||||||
|
mediaUrl = `${REDTUBE_BASE_URL}${getMediaUrl}`
|
||||||
|
}
|
||||||
|
|
||||||
|
})
|
||||||
|
|
||||||
|
const wrapperId = "#related_videos_center li.tm_video_block"
|
||||||
|
|
||||||
|
const thumbs = $(wrapperId);
|
||||||
|
|
||||||
|
thumbs.map((key, thumb) => {
|
||||||
|
|
||||||
|
const videoUrl = $(thumb).find("a.tm_video_link").attr("href")?.split('/')[1];
|
||||||
|
const imgUrl = $(thumb).find("img.js_thumbImageTag").attr("data-src")
|
||||||
|
const text = $(thumb).find("a.tm_video_title").attr("title");
|
||||||
|
|
||||||
|
videoUrl && imgUrl && text && relatedData.push({
|
||||||
|
videoUrl,
|
||||||
|
imgUrl,
|
||||||
|
text,
|
||||||
|
platform: Platforms.redtube
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
}).catch((error: AxiosError) => {
|
||||||
|
// error handling goes here
|
||||||
|
});
|
||||||
|
|
||||||
|
if (sessionCookie && mediaUrl) {
|
||||||
|
convertedData = await getRedTubeMediaUrlList(mediaUrl, sessionCookie)
|
||||||
|
data.srcSet = convertedData.reverse()
|
||||||
|
|
||||||
|
await storeDataIntoRedis(queryUrl, data, DEFAULT_REDTUBE_VIDEO_EXPIRY);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (relatedData.length > 0) {
|
||||||
|
await storeDataIntoRedis(queryUrl + DEFAULT_RELATED_VIDEO_KEY_PATH, relatedData, DEFAULT_REDTUBE_GALLERY_EXPIRY);
|
||||||
|
}
|
||||||
|
|
||||||
return [ data, relatedData ]
|
return [ data, relatedData ]
|
||||||
}
|
}
|
Loading…
Reference in New Issue