diff --git a/README.md b/README.md index b5d2f02..bbb710e 100644 --- a/README.md +++ b/README.md @@ -70,6 +70,7 @@ These envvars turn off/on engines. By default all of them are enabled. | [mozhi.aryak.me](https://mozhi.aryak.me) | No | India | Airtel | | [translate.bus-hit.me](https://translate.bus-hit.me) | No | Canada | Oracle | | [nyc1.mz.ggtyler.dev](https://nyc1.mz.ggtyler.dev) | No | USA | Royale Hosting | +| [translate.projectsegfau.lt](https://translate.projectsegfau.lt) | No | Germany / USA / India | Avoro / Racknerd / Airtel | ## Features - An all mode where the responses of all supported engines will be shown. diff --git a/instances.json b/instances.json new file mode 100644 index 0000000..7c42f99 --- /dev/null +++ b/instances.json @@ -0,0 +1,20 @@ +[ + { + "country": "India", + "link": "https://mozhi.aryak.me", + "cloudflare": false, + "host": "Airtel" + }, + { + "country": "Canada", + "link": "https://translate.bus-hit.me", + "cloudflare": false, + "host": "Oracle" + }, + { + "country": "USA", + "link": "https://nyc1.mz.ggtyler.dev", + "cloudflare": false, + "host": "Royale Hosting" + } +] \ No newline at end of file diff --git a/instances2json.py b/instances2json.py new file mode 100644 index 0000000..3d85fc3 --- /dev/null +++ b/instances2json.py @@ -0,0 +1,66 @@ +#!/usr/bin/python3 +import requests +import json +from bs4 import BeautifulSoup + +print("Getting HTML") + +headers = { + 'User-Agent': 'Mozilla/5.0 MozhiInstanceFetcher/1.0 (+codeberg.org/aryak/mozhi)' +} + +# Get the HTML from the page +r = requests.get('https://codeberg.org/aryak/mozhi', headers=headers) + +# Parse the HTML +soup = BeautifulSoup(r.text, 'html.parser') + +print("Scraping started") + +# Get tables +tables = soup.find_all('table') + +# Get table with header 'Master Branch' +table = tables[1] + +# Get all rows and columns. Skip the first row because it's the header +rows = table.find_all('tr')[1:] + +theJson = [] + +for row in rows: + + link = row.find_all('td')[0].find('a')['href'] + cloudflare = row.find_all('td')[1].text + country = row.find_all('td')[2].text + host = row.find_all('td')[3].text + + print("Scraping " + row.find_all('td')[0].find('a')['href'] + ' instance...') + if cloudflare == 'Yes': + isCloudflare = True + else: + isCloudflare = False + + try: + r = requests.get(link + '/', headers=headers) + if r.status_code != 200: + print("Error while fetching " + link + '/. We got a ' + str(r.status_code) + ' status code. Skipping...') + continue + except: + print("Error while fetching " + link + '/. Skipping...') + continue + + theJson.append({ + 'country': country, + 'link': link, + 'cloudflare': isCloudflare, + 'host': host, + }) + + +print("Scraping finished. Saving JSON...") + +# save JSON +with open('instances.json', 'w') as outfile: + json.dump(theJson, outfile, indent=4) + print("File saved as instances.json")