0
Fork 0
mirror of https://codeberg.org/SafeTwitch/safetwitch.git synced 2024-12-22 05:12:57 -05:00

Clean code

This commit is contained in:
dragongoose 2023-03-07 23:31:09 -05:00
parent 60bae4f617
commit 088df551b0
4 changed files with 232 additions and 208 deletions

View file

@ -1,7 +1,7 @@
<script lang="ts" >
import { ref, onMounted } from 'vue'
import { useRoute } from "vue-router";
import type { StreamerData } from '../../../server/routes/profile/profileRoute'
import type { StreamerData } from '../../../server/types/scraping/Streamer'
import { VideoPlayer } from '@videojs-player/vue'
import 'video.js/dist/video-js.css'

View file

@ -1,219 +1,16 @@
import { Router } from 'express'
import puppeteer from 'puppeteer-extra'
import { Browser, Page } from 'puppeteer'
import { PuppeteerExtraPluginAdblocker } from 'puppeteer-extra-plugin-adblocker'
import { LooseObject } from '../../types/looseTypes'
import { Streamlink } from '@dragongoose/streamlink'
puppeteer.use(new PuppeteerExtraPluginAdblocker({
blockTrackersAndAnnoyances: true
}))
import { TwitchScraper } from '../../util/scraping/extractors'
const profileRouter = Router()
export interface Socials {
type: string | null
text: string,
link: string
}
export interface StreamData {
tags: string[]
title: string
topic: string
startedAt: number
qualities: string[]
}
export interface StreamerData {
username: string,
followers: number,
followersAbbv: string,
isLive: boolean,
about: string,
socials?: string[],
pfp: string;
stream?: StreamData
}
const abbreviatedNumberToNumber = (num: string) => {
const base = parseFloat(num)
const matches: {[k: string]: number} = {
'k': 1000,
'm': 1000000,
'b': 1000000000
}
const abbreviation: string = num.charAt(num.length - 1).toLowerCase()
if(matches[abbreviation]) {
const numberOnly: number = Number(num.slice(0, -1))
return numberOnly * matches[abbreviation]
} else {
return null
}
}
// https:// advancedweb.hu/how-to-speed-up-puppeteer-scraping-with-parallelization/
const withBrowser = async (fn: Function) => {
const browser = await puppeteer.launch({
headless: true,
args: ['--no-sandbox']
});
try {
return await fn(browser);
} finally {
await browser.close();
}
}
const withPage = (browser: Browser) => async (fn: Function) => {
const page = await browser.newPage();
//await page.tracing.start({ path: '../profile.json', screenshots: true });
try {
return await fn(page);
} finally {
//await page.tracing.stop();
await page.close();
}
}
let isLive: boolean
const getStreamData = async (page: Page) => {
const streamData: LooseObject = {}
if(!isLive) return null
// Get stream tags
const tagsSelector = '.eUxEWt * span'
const tags: string[] = await page.$$eval(tagsSelector, elements => elements.map(el => el.innerHTML))
streamData.tags = tags
// Get stream title
const titleSelector = 'h2.CoreText-sc-1txzju1-0'
const title: string = await page.$eval(titleSelector, element => element.innerText)
streamData.title = title
// Get topic
const topicSelector = '.hfMGmo'
const topic = await page.$eval(topicSelector, element => element.textContent)
streamData.topic = topic
// Get Start time
const liveTimeSelector = '.live-time'
// formated as HH:MM:SS
const liveTime = await page.$eval(liveTimeSelector, element => element.textContent)
if(!liveTime) return
const liveTimeSplit: number[] = liveTime.split(':').map(Number)
let date = new Date()
let { hours, minutes, seconds } = { hours: date.getHours(), minutes: date.getMinutes(), seconds: date.getSeconds()}
// Subtracts current live time from current
// date to get the time the stream started
date.setHours(hours - liveTimeSplit[0])
date.setMinutes(minutes - liveTimeSplit[1])
date.setSeconds(seconds - liveTimeSplit[2])
streamData.startedAt = date.getTime()
return streamData as StreamData
}
const getAboutData = async (page: Page) => {
const aboutData: LooseObject = {}
if (!isLive) {
// Get data from about page
const aboutPageButtonSelector = 'li.InjectLayout-sc-1i43xsx-0:nth-child(2) > a:nth-child(1) > div:nth-child(1) > div:nth-child(1) > p:nth-child(1)'
await page.click(aboutPageButtonSelector)
}
await page.waitForSelector('.ccXeNc')
const followersSelector = '.kuAEke'
const followers = await page.$eval(followersSelector, element => element.innerHTML)
aboutData.followersAbbv = followers
aboutData.followers = abbreviatedNumberToNumber(followers)
const aboutSectionSelector = '.kLFSJC'
const aboutSection = await page.$eval(aboutSectionSelector, element => element.innerHTML)
aboutData.about = aboutSection
const socialSelector = '.ccXeNc * a'
const socials: Socials[] = await page.$$eval(socialSelector, elements => elements.map((el) => {
const getHostName = (url: string) => {
const match = url.match(/:\/\/(www[0-9]?\.)?(.[^/:]+)/i);
if (match != null && match.length > 2 && typeof match[2] === 'string' && match[2].length > 0) {
const hostname = match[2].split(".");
return hostname[0];
}
else {
return null;
}
}
const validHosts = ['instagram', 'youtube', 'discord', 'tiktok','twitter']
const socialHost = getHostName(el.href) || el.href || ''
let type: string | null = socialHost
if(!validHosts.includes(socialHost))
type = null
return {
type,
link: el.href,
text: el.innerText
}
}))
aboutData.socials = socials
const profilePictureSelector = 'figure.ScAvatar-sc-144b42z-0:nth-child(2) > img:nth-child(1)'
const profilePicutre = await page.$eval(profilePictureSelector, element => element.getAttribute('src'))
aboutData.pfp = profilePicutre
return aboutData as StreamerData
}
const getStreamerData = async (username: string): Promise<StreamerData> => {
let recoveredData: LooseObject = {}
await withBrowser(async (browser: Browser) => {
const result = await withPage(browser)(async (page: Page) => {
await page.goto(`https://twitch.tv/${username}`)
return Promise.all([getStreamData(page), getAboutData(page)])
})
recoveredData = result[1]
recoveredData.stream = result[0]
if(result[0] !== null) recoveredData.isLive = true
await browser.close()
})
recoveredData.username = username
return recoveredData as StreamerData
}
const scraper = new TwitchScraper()
profileRouter.get('/users/:username', async (req, res, next) => {
const username = req.params.username
const streamlink = new Streamlink(`https://twitch.tv/${username}`, {})
isLive = await streamlink.isLive()
let streamerData = await getStreamerData(username)
let streamerData = await scraper.getStreamerData(username)
.catch(next)
if(streamerData && streamerData.stream && isLive)
streamerData.stream.qualities = await streamlink.getQualities()
if(streamerData) {
streamerData.isLive = isLive
res.send(streamerData)
}
res.send(streamerData)
})
export default profileRouter

View file

@ -0,0 +1,24 @@
export interface Socials {
type: string | null
text: string,
link: string
}
export interface StreamData {
tags: string[]
title: string
topic: string
startedAt: number
qualities: string[]
}
export interface StreamerData {
username: string,
followers: number,
followersAbbv: string,
isLive: boolean,
about: string,
socials?: string[],
pfp: string;
stream?: StreamData
}

View file

@ -0,0 +1,203 @@
import puppeteer from 'puppeteer-extra'
import { Browser, Page } from 'puppeteer'
import { PuppeteerExtraPluginAdblocker } from 'puppeteer-extra-plugin-adblocker'
import { LooseObject } from '../../types/looseTypes'
import { StreamData, StreamerData, Socials } from '../../types/scraping/Streamer'
import { Streamlink } from '@dragongoose/streamlink'
export class TwitchScraper {
public cache: Map<string, StreamerData> = new Map()
constructor() {
puppeteer.use(new PuppeteerExtraPluginAdblocker({
blockTrackersAndAnnoyances: true
}))
}
private abbreviatedNumberToNumber = (num: string) => {
const base = parseFloat(num)
const matches: {[k: string]: number} = {
'k': 1000,
'm': 1000000,
'b': 1000000000
}
const abbreviation: string = num.charAt(num.length - 1).toLowerCase()
if(matches[abbreviation]) {
const numberOnly: number = Number(num.slice(0, -1))
return numberOnly * matches[abbreviation]
} else {
return null
}
}
// https:// advancedweb.hu/how-to-speed-up-puppeteer-scraping-with-parallelization/
private withBrowser = async (fn: Function) => {
const browser = await puppeteer.launch({
headless: true,
args: ['--no-sandbox']
});
try {
return await fn(browser);
} finally {
await browser.close();
}
}
private withPage = (browser: Browser) => async (fn: Function) => {
const page = await browser.newPage();
//await page.tracing.start({ path: '../profile.json', screenshots: true });
try {
return await fn(page);
} finally {
//await page.tracing.stop();
await page.close();
}
}
private getStreamData = async (page: Page, isLive: boolean) => {
const streamData: LooseObject = {}
if(!isLive) return null
// Get stream tags
const tagsSelector = '.eUxEWt * span'
const tags: string[] = await page.$$eval(tagsSelector, elements => elements.map(el => el.innerHTML))
streamData.tags = tags
// Get stream title
const titleSelector = 'h2.CoreText-sc-1txzju1-0'
const title: string = await page.$eval(titleSelector, element => element.innerText)
streamData.title = title
// Get topic
const topicSelector = '.hfMGmo'
const topic = await page.$eval(topicSelector, element => element.textContent)
streamData.topic = topic
// Get Start time
const liveTimeSelector = '.live-time'
// formated as HH:MM:SS
const liveTime = await page.$eval(liveTimeSelector, element => element.textContent)
if(!liveTime) return
const liveTimeSplit: number[] = liveTime.split(':').map(Number)
let date = new Date()
let { hours, minutes, seconds } = { hours: date.getHours(), minutes: date.getMinutes(), seconds: date.getSeconds()}
// Subtracts current live time from current
// date to get the time the stream started
date.setHours(hours - liveTimeSplit[0])
date.setMinutes(minutes - liveTimeSplit[1])
date.setSeconds(seconds - liveTimeSplit[2])
streamData.startedAt = date.getTime()
return streamData as StreamData
}
private getAboutData = async (page: Page, isLive: boolean) => {
const aboutData: LooseObject = {}
if (!isLive) {
// Get data from about page
const aboutPageButtonSelector = 'li.InjectLayout-sc-1i43xsx-0:nth-child(2) > a:nth-child(1) > div:nth-child(1) > div:nth-child(1) > p:nth-child(1)'
await page.click(aboutPageButtonSelector)
}
await page.waitForSelector('.kuAEke')
const followersSelector = '.kuAEke'
const followers = await page.$eval(followersSelector, element => element.innerHTML)
aboutData.followersAbbv = followers
aboutData.followers = this.abbreviatedNumberToNumber(followers)
const aboutSectionSelector = '.kLFSJC'
const aboutSection = await page.$eval(aboutSectionSelector, element => element.innerHTML)
aboutData.about = aboutSection
const socialSelector = '.ccXeNc * a'
const socials: Socials[] = await page.$$eval(socialSelector, elements => elements.map((el) => {
const getHostName = (url: string) => {
const match = url.match(/:\/\/(www[0-9]?\.)?(.[^/:]+)/i);
if (match != null && match.length > 2 && typeof match[2] === 'string' && match[2].length > 0) {
const hostname = match[2].split(".");
return hostname[0];
}
else {
return null;
}
}
const validHosts = ['instagram', 'youtube', 'discord', 'tiktok','twitter']
const socialHost = getHostName(el.href) || el.href || ''
let type: string | null = socialHost
if(!validHosts.includes(socialHost))
type = null
return {
type,
link: el.href,
text: el.innerText
}
}))
aboutData.socials = socials
const profilePictureSelector = 'figure.ScAvatar-sc-144b42z-0:nth-child(2) > img:nth-child(1)'
const profilePicutre = await page.$eval(profilePictureSelector, element => element.getAttribute('src'))
aboutData.pfp = profilePicutre
return aboutData as StreamerData
}
public getStreamerData = async (username: string): Promise<StreamerData> => {
let recoveredData: LooseObject = {}
let isLive = await this.isLive(username)
await this.withBrowser(async (browser: Browser) => {
const result = await this.withPage(browser)(async (page: Page) => {
const res = await page.goto(`https://twitch.tv/${username}`)
if(!res?.ok()) {
return null
} else {
return Promise.all([this.getStreamData(page, isLive), this.getAboutData(page, isLive)])
}
})
recoveredData = result[1]
recoveredData.stream = result[0]
if(result[0] !== null) recoveredData.isLive = true
await browser.close()
})
// add final information
if(recoveredData && recoveredData.stream && isLive)
recoveredData.stream.qualities = await this.getQualities(username)
if(recoveredData) {
recoveredData.isLive = isLive
}
recoveredData.username = username
return recoveredData as StreamerData
}
public isLive = async (username: string) => {
const streamlink = new Streamlink(`https://twitch.tv/${username}`, {})
return await streamlink.isLive()
}
public getQualities = async (username: string) => {
const streamlink = new Streamlink(`https://twitch.tv/${username}`, {})
return await streamlink.getQualities()
}
}