add YouPorn video scrape

This commit is contained in:
La macchina desiderante 2024-05-25 20:03:25 +02:00
parent b84ad5127a
commit 7c5ca07ab4
7 changed files with 112 additions and 10 deletions

View File

@ -4,8 +4,12 @@ const EX_DAILY = 60 * 60 * 24
export const DEFAULT_PORNHUB_GALLERY_EXPIRY = { EX: EX_HOURLY }; export const DEFAULT_PORNHUB_GALLERY_EXPIRY = { EX: EX_HOURLY };
export const DEFAULT_PORNHUB_VIDEO_EXPIRY = { EX: EX_MIN }; export const DEFAULT_PORNHUB_VIDEO_EXPIRY = { EX: EX_MIN };
export const DEFAULT_XVIDEOS_CONTENT_EXPIRY = { EX: EX_HOURLY }; export const DEFAULT_XVIDEOS_CONTENT_EXPIRY = { EX: EX_HOURLY };
export const DEFAULT_XNXX_CONTENT_EXPIRY = { EX: EX_HOURLY }; export const DEFAULT_XNXX_CONTENT_EXPIRY = { EX: EX_HOURLY };
export const DEFAULT_YOUPORN_GALLERY_EXPIRY = { EX: EX_HOURLY }; export const DEFAULT_YOUPORN_GALLERY_EXPIRY = { EX: EX_HOURLY };
export const DEFAULT_YOUPORN_VIDEO_EXPIRY = { EX: EX_HOURLY };
export const DEFAULT_RELATED_VIDEO_KEY_PATH = '/related/' export const DEFAULT_RELATED_VIDEO_KEY_PATH = '/related/'

View File

@ -26,4 +26,6 @@ export const PORNHUB_BASE_URL_GAY_SEARCH: string = 'https://www.pornhub.com/gay'
// YOUPORN // YOUPORN
export const YOUPORN_BASE_URL: string = 'https://www.youporn.com' export const YOUPORN_BASE_URL: string = 'https://www.youporn.com'
export const YOUPORN_BASE_URL_VIDEO: string = 'https://www.youporn.com/watch'
export const YOUPORN_BASE_SEARCH: string = 'https://www.youporn.com/search/?search-btn=&query=' export const YOUPORN_BASE_SEARCH: string = 'https://www.youporn.com/search/?search-btn=&query='

View File

@ -23,6 +23,11 @@ export interface VideoData {
srcSet?: VideoSourceItem[] srcSet?: VideoSourceItem[]
} }
export interface MindGeekVideoSrcElem {
videoUrl: string
quality: string
}
export interface VideoAgent { export interface VideoAgent {
getGallery(params?: FetchParams): Promise<GalleryData[]> getGallery(params?: FetchParams): Promise<GalleryData[]>
getVideo(id: string, params?: FetchParams): Promise<[VideoData, GalleryData[]]> getVideo(id: string, params?: FetchParams): Promise<[VideoData, GalleryData[]]>

View File

@ -1,7 +1,7 @@
export const findGetMediaUrlInTagblock = ( export const findGetMediaUrlInTagblock = (
tagBlock: string): string | null => { tagBlock: string, key?: string): string | null => {
const getMediaIndex = tagBlock.indexOf('get_media'); const getMediaIndex = tagBlock.indexOf(key ?? 'get_media');
if (getMediaIndex === -1) { if (getMediaIndex === -1) {
return null return null

View File

@ -1,17 +1,12 @@
import { PORNHUB_BASE_URL, PORNHUB_BASE_URL_GAY, PORNHUB_BASE_URL_GAY_SEARCH } from "@/constants/urls" import { PORNHUB_BASE_URL, PORNHUB_BASE_URL_GAY, PORNHUB_BASE_URL_GAY_SEARCH } from "@/constants/urls"
import axios, { AxiosHeaders } from "axios" import axios, { AxiosHeaders } from "axios"
import { getHeadersWithCookie } from "../common/headers" import { getHeadersWithCookie } from "../common/headers"
import { GalleryData, VideoSourceItem } from "@/meta/data" import { GalleryData, MindGeekVideoSrcElem, VideoSourceItem } from "@/meta/data"
import { Cookies, Platforms, PornHubOrientations } from "@/meta/settings" import { Cookies, Platforms, PornHubOrientations } from "@/meta/settings"
import { getCookie } from "@/utils/cookies/read" import { getCookie } from "@/utils/cookies/read"
import { encodeUrl } from "@/utils/string" import { encodeUrl } from "@/utils/string"
import { DEFAULT_VIDEO_STREAM_ROUTE_PREFIX } from "@/constants/stream" import { DEFAULT_VIDEO_STREAM_ROUTE_PREFIX } from "@/constants/stream"
interface PornHubVideoSrcElem {
videoUrl: string
quality: string
}
export const getPornHubQueryUrl = async (query?: string): Promise<string> => { export const getPornHubQueryUrl = async (query?: string): Promise<string> => {
const orientation = await getCookie(Cookies.orientation) const orientation = await getCookie(Cookies.orientation)
@ -49,7 +44,7 @@ export const getPornHubMediaUrlList = async (url: string, sessionCookie: string)
if (response.data) { if (response.data) {
videos = await response.data.map((elem: PornHubVideoSrcElem) => ({ videos = await response.data.map((elem: MindGeekVideoSrcElem) => ({
src: `${DEFAULT_VIDEO_STREAM_ROUTE_PREFIX}/${Platforms.pornhub}/${encodeUrl(elem?.videoUrl)}`, src: `${DEFAULT_VIDEO_STREAM_ROUTE_PREFIX}/${Platforms.pornhub}/${encodeUrl(elem?.videoUrl)}`,
type: 'video/mp4', type: 'video/mp4',
size: elem?.quality size: elem?.quality

View File

@ -1,4 +1,7 @@
import { YOUPORN_BASE_SEARCH, YOUPORN_BASE_URL } from "@/constants/urls" import { YOUPORN_BASE_SEARCH, YOUPORN_BASE_URL } from "@/constants/urls"
import { getHeadersWithCookie } from "../common/headers"
import axios from "axios"
import { MindGeekVideoSrcElem, VideoSourceItem } from "@/meta/data"
export const getYouPornQueryUrl = async (query?: string): Promise<string> => { export const getYouPornQueryUrl = async (query?: string): Promise<string> => {
@ -8,3 +11,32 @@ export const getYouPornQueryUrl = async (query?: string): Promise<string> => {
return YOUPORN_BASE_URL return YOUPORN_BASE_URL
} }
export const getYouPornMediaUrlList = async (url: string, sessionCookie: string): Promise<VideoSourceItem[]> => {
const headersWithCookie = getHeadersWithCookie(YOUPORN_BASE_URL, sessionCookie)
let videos: VideoSourceItem[] = []
await axios.get(url, headersWithCookie)
.then(async response => {
if (response.data) {
videos = await response.data.map((elem: MindGeekVideoSrcElem) => ({
src: elem?.videoUrl,
type: 'video/mp4',
size: elem?.quality
})) as VideoSourceItem[]
return videos
} else {
return []
}
})
.catch(error => console.log(error))
return videos
}

View File

@ -1,4 +1,14 @@
import { FetchParams, GalleryData, VideoData } from "@/meta/data"; import { YOUPORN_BASE_URL, YOUPORN_BASE_URL_VIDEO } from "@/constants/urls";
import { FetchParams, GalleryData, VideoData, VideoSourceItem } from "@/meta/data";
import { getHeaders } from "../common/headers";
import { getDataFromRedis, storeDataIntoRedis } from "@/redis/client";
import { DEFAULT_RELATED_VIDEO_KEY_PATH, DEFAULT_YOUPORN_VIDEO_EXPIRY } from "@/constants/redis";
import * as cheerio from "cheerio";
import axios, { AxiosError } from "axios";
import { createSessionCookie, findGetMediaUrlInTagblock, findGetRelatedUrlInTagblock } from "../common/mindgeek";
import { getYouPornMediaUrlList } from "./url";
export const fetchYouPornVideoData = async (videoId: string, params?: FetchParams): Promise<[VideoData, GalleryData[]]> => { export const fetchYouPornVideoData = async (videoId: string, params?: FetchParams): Promise<[VideoData, GalleryData[]]> => {
@ -9,5 +19,59 @@ export const fetchYouPornVideoData = async (videoId: string, params?: FetchParam
let relatedData: GalleryData[] = []; let relatedData: GalleryData[] = [];
let mediaUrl, relatedUrl, sessionCookie, convertedData: VideoSourceItem[]
let reqHeaders = getHeaders(YOUPORN_BASE_URL)
const queryUrl = `${YOUPORN_BASE_URL_VIDEO}/${videoId.replace(/\//g, '')}`
const cachedVideoData = await getDataFromRedis(queryUrl)
const cachedRelatedData = await getDataFromRedis(queryUrl + DEFAULT_RELATED_VIDEO_KEY_PATH)
if (cachedVideoData) {
return [cachedVideoData as VideoData, cachedRelatedData as GalleryData[] ?? []]
}
await axios.get(queryUrl, reqHeaders)
.then(async response => {
sessionCookie = response?.headers["set-cookie"] ? createSessionCookie(response?.headers["set-cookie"]) : '';
const html = response.data;
const $ = cheerio.load(html);
const scriptTags = $("script");
scriptTags.map((idx, elem) => {
const getMediaUrl = findGetMediaUrlInTagblock($(elem).toString().replace(/\\/g, ''), 'media/mp4') ?? null
if (getMediaUrl) {
mediaUrl = getMediaUrl
}
})
scriptTags.map((idx, elem) => {
const getRelatedUrl = findGetRelatedUrlInTagblock($(elem).toString()) ?? null
if (getRelatedUrl) {
relatedUrl = getRelatedUrl
}
})
}).catch((error: AxiosError) => {
// error handling goes here
});
if (sessionCookie && mediaUrl) {
convertedData = await getYouPornMediaUrlList(mediaUrl, sessionCookie)
data.srcSet = convertedData.reverse()
await storeDataIntoRedis(queryUrl, data, DEFAULT_YOUPORN_VIDEO_EXPIRY);
}
return [ data, relatedData ] return [ data, relatedData ]
} }