From 1ff49f0669cbaff4689024972e53f18c87a845cb Mon Sep 17 00:00:00 2001 From: wukko Date: Sat, 20 Apr 2024 20:33:34 +0600 Subject: [PATCH 1/4] instagram: use different endpoint and fallback to two other options --- src/modules/processing/services/instagram.js | 140 +++++++++++++++---- 1 file changed, 112 insertions(+), 28 deletions(-) diff --git a/src/modules/processing/services/instagram.js b/src/modules/processing/services/instagram.js index b5e2228d..d27a5693 100644 --- a/src/modules/processing/services/instagram.js +++ b/src/modules/processing/services/instagram.js @@ -60,39 +60,99 @@ async function request(url, cookie, method = 'GET', requestData) { return data.json(); } -async function getPost(id) { - let data; - try { - const cookie = getCookie('instagram'); - let dtsgId; - - if (cookie) { - dtsgId = await findDtsgId(cookie); +async function requestHTML(id, cookie = {}) { + const data = await fetch(`https://www.instagram.com/p/${id}/embed/captioned/`, { + headers: { + "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7", + "Accept-Language": "en-GB,en;q=0.9", + "Cache-Control": "max-age=0", + "Dnt": "1", + "Priority": "u=0, i", + "Sec-Ch-Ua": 'Chromium";v="124", "Google Chrome";v="124", "Not-A.Brand";v="99', + "Sec-Ch-Ua-Mobile": "?0", + "Sec-Ch-Ua-Platform": "macOS", + "Sec-Fetch-Dest": "document", + "Sec-Fetch-Mode": "navigate", + "Sec-Fetch-Site": "none", + "Sec-Fetch-User": "?1", + "Upgrade-Insecure-Requests": "1", + "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36", + ...cookie } + }).then(r => r.text()); - const url = new URL('https://www.instagram.com/api/graphql/'); + let embedData = JSON.parse(data?.match(/"init",\[\],\[(.*?)\]\],/)[1]); - const requestData = { - jazoest: '26406', - variables: JSON.stringify({ - shortcode: id, - __relay_internal__pv__PolarisShareMenurelayprovider: false - }), - doc_id: '7153618348081770' - }; - if (dtsgId) { - requestData.fb_dtsg = dtsgId; + if (!embedData || !embedData?.contextJSON) return false; + + embedData = JSON.parse(embedData.contextJSON); + + return embedData; +} +async function requestGQL(id, cookie) { + let dtsgId; + + if (cookie) { + dtsgId = await findDtsgId(cookie); + } + const url = new URL('https://www.instagram.com/api/graphql/'); + + const requestData = { + jazoest: '26406', + variables: JSON.stringify({ + shortcode: id, + __relay_internal__pv__PolarisShareMenurelayprovider: false + }), + doc_id: '7153618348081770' + }; + if (dtsgId) { + requestData.fb_dtsg = dtsgId; + } + + return (await request(url, cookie, 'POST', requestData)) + .data + ?.xdt_api__v1__media__shortcode__web_info + ?.items + ?.[0]; +} + +async function extractOldPost(data, id) { + const sidecar = data?.gql_data?.shortcode_media?.edge_sidecar_to_children; + if (sidecar) { + const picker = sidecar.edges.filter(e => e.node?.display_url) + .map(e => { + const type = e.node?.is_video ? "video" : "photo"; + const url = type === "video" ? e.node?.video_url : e.node?.display_url; + + return { + type, url, + /* thumbnails have `Cross-Origin-Resource-Policy` + ** set to `same-origin`, so we need to proxy them */ + thumb: createStream({ + service: "instagram", + type: "default", + u: e.node?.display_url, + filename: "image.jpg" + }) + } + }); + + if (picker.length) return { picker } + } else if (data?.gql_data?.shortcode_media?.video_url) { + return { + urls: data.shortcode_media.video_url, + filename: `instagram_${id}.mp4`, + audioFilename: `instagram_${id}_audio` } + } else if (data?.gql_data?.shortcode_media?.display_url) { + return { + urls: data.gql_data?.shortcode_media.display_url, + isPhoto: true + } + } +} - data = (await request(url, cookie, 'POST', requestData)) - .data - ?.xdt_api__v1__media__shortcode__web_info - ?.items - ?.[0]; - } catch {} - - if (!data) return { error: 'ErrorCouldntFetch' }; - +async function extractNewPost(data, id) { const carousel = data.carousel_media; if (carousel) { const picker = carousel.filter(e => e?.image_versions2) @@ -133,7 +193,31 @@ async function getPost(id) { isPhoto: true } } +} +async function getPost(id) { + let data, result, dataType = 'old'; + try { + const cookie = getCookie('instagram'); + + data = await requestHTML(id); + if (!data) data = await requestHTML(id, cookie); + + if (!data) { + dataType = 'new'; + data = await requestGQL(id, cookie); + } + } catch {} + + if (!data) return { error: 'ErrorCouldntFetch' }; + + if (dataType === 'new') { + result = extractNewPost(data, id) + } else { + result = extractOldPost(data, id) + } + + if (result) return result; return { error: 'ErrorEmptyDownload' } } From 2561cf168e125f5931d6dcf2d3ec6bdf3589a7a9 Mon Sep 17 00:00:00 2001 From: wukko Date: Sat, 20 Apr 2024 20:44:58 +0600 Subject: [PATCH 2/4] instagram: check if cookie exists before using it in second fallback --- src/modules/processing/services/instagram.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/modules/processing/services/instagram.js b/src/modules/processing/services/instagram.js index d27a5693..33c9f21c 100644 --- a/src/modules/processing/services/instagram.js +++ b/src/modules/processing/services/instagram.js @@ -201,7 +201,7 @@ async function getPost(id) { const cookie = getCookie('instagram'); data = await requestHTML(id); - if (!data) data = await requestHTML(id, cookie); + if (!data && cookie) data = await requestHTML(id, cookie); if (!data) { dataType = 'new'; From 018557cbcd8c452eaf02b2e1398100ef98e53062 Mon Sep 17 00:00:00 2001 From: wukko Date: Sat, 20 Apr 2024 20:47:33 +0600 Subject: [PATCH 3/4] instagram: remove async tag from non async functions --- src/modules/processing/services/instagram.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/modules/processing/services/instagram.js b/src/modules/processing/services/instagram.js index 33c9f21c..2f6900c7 100644 --- a/src/modules/processing/services/instagram.js +++ b/src/modules/processing/services/instagram.js @@ -116,7 +116,7 @@ async function requestGQL(id, cookie) { ?.[0]; } -async function extractOldPost(data, id) { +function extractOldPost(data, id) { const sidecar = data?.gql_data?.shortcode_media?.edge_sidecar_to_children; if (sidecar) { const picker = sidecar.edges.filter(e => e.node?.display_url) @@ -152,7 +152,7 @@ async function extractOldPost(data, id) { } } -async function extractNewPost(data, id) { +function extractNewPost(data, id) { const carousel = data.carousel_media; if (carousel) { const picker = carousel.filter(e => e?.image_versions2) From dd7c7dfa7603943c324cb89ff29813ac18279407 Mon Sep 17 00:00:00 2001 From: wukko Date: Sat, 20 Apr 2024 20:48:49 +0600 Subject: [PATCH 4/4] instagram: clean up --- src/modules/processing/services/instagram.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/modules/processing/services/instagram.js b/src/modules/processing/services/instagram.js index 2f6900c7..afcd6296 100644 --- a/src/modules/processing/services/instagram.js +++ b/src/modules/processing/services/instagram.js @@ -60,7 +60,7 @@ async function request(url, cookie, method = 'GET', requestData) { return data.json(); } -async function requestHTML(id, cookie = {}) { +async function requestHTML(id, cookie) { const data = await fetch(`https://www.instagram.com/p/${id}/embed/captioned/`, { headers: { "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",