# **********************************************************************************************
# This code scans the EXOMOL webiste, and lists all available molecules and linelists.
# It writes to files: "Exomol_species.dat" and "Exomol_xsec_species.dat", and lists the
# corresponding file names

# Date: May 2019
# Author: Simon Grimm
#
# *********************************************************************************************

from bs4 import BeautifulSoup
import requests
import sys

#this function extracts the ranges of the .trans files
#it returns the ranges, or -1 when the ranges are not equal
#it returns the number of transition files
#it returns the number of digits of the ranges
def transitionRanges(url):
    #url="http://exomol.com/data/molecules/H2O/1H2-16O/BT2/"

    page = requests.get(url).text
    soup = BeautifulSoup(page, "html.parser")
    List = soup.find_all('li', attrs={"class" : "list-group-item link-list-group-item"})

    #print(List[4].a)
    #print(List[4].a.get('href'))

    transList = []

    #write a list with all transition file ranges
    for i in range(len(List)):
        el = List[i].a.get('href')
        #print(el)
        el1 = el.split('__')[-1]        #split at __ and take right part of it
        el2 = el1.split('.trans')[0]
        if(len(el1.split('.trans')) > 1):
            #print(el2)
            transList.append(el2)

    rangesList = []
    if(len(transList) > 1):
        #check range of files
        for x in transList:
            #print(x)
            try:
                x0 = float(x.split('-')[0])
                x1 = float(x.split('-')[1])
                dg = len(x.split('-')[0])
                #print(x1-x0)
                rangesList.append(x1-x0)
            except:
                print("error", url, x)
                return(0, 0, 0)

        s = rangesList[0]
        for r in rangesList:
            if(r != s):
                s=-1
        n = len(rangesList)
    else:
        s = 0
        n = 1
        dg = 0
    #print(s, n)
    return(s, n, dg)


def main():

    print("Scan Exomol webiste for file names")

    url="http://exomol.com/data/atoms/"
    page = requests.get(url).text
    soup = BeautifulSoup(page, "html.parser")

    List = soup.find_all('a', attrs={"class" : "list-group-item link-list-group-item molecule_link"})

    url="http://exomol.com/data/molecules/"
    page = requests.get(url).text
    soup = BeautifulSoup(page, "html.parser")

    List += soup.find_all('a', attrs={"class" : "list-group-item link-list-group-item molecule_link"})

    efile = open("Exomol_species.dat", "w", buffering=1)
    exfile = open("Exomol_xsec_species.dat", "w", buffering=1)

    if(len(List) == 0):
        print("Error, no molecules found, maybe the Exomol homepage has changed")
        sys.exit(100)

    #Molecule
    for i in range(len(List)):
    #for i in range(20):
        el = List[i].get('href').split('data/molecules/')[1]
        print(el)

        url1 = url + el + "/"
        page1 = requests.get(url1).text
        soup1 = BeautifulSoup(page1, "html.parser")

        List1 = soup1.find_all('a', attrs={"class" : "list-group-item link-list-group-item"})

        #Isotopologue
        for j in range(len(List1)):
            el1 = List1[j].get('href')

            print("    ", el1)

            url2 = url1 + el1 + "/"
            page2 = requests.get(url2).text
            soup2 = BeautifulSoup(page2, "html.parser")
            #print(soup2)

            List2 = soup2.find_all('a', attrs={"class" : "list-group-item link-list-group-item "})
            List2 += soup2.find_all('a', attrs={"class" : "list-group-item link-list-group-item"})
            List2 += soup2.find_all('a', attrs={"class" : "list-group-item link-list-group-item recommended"})

            #Line list
            for k in range(len(List2)):
                el2 = List2[k].get('href')

                el3 = el2.replace("HITEMP", "HITEMP2010")

                if(el2.find("xsec-") >= 0):

                    #change HITEMP to HITEMP2010, but only in the prints

                    name = el2.split("xsec-")[1]
                    p3 = el1 + "__" + name
                    print("%-16s %-24s %-32s %-32s" % (el, el1, el3, p3), file=exfile)

                else:
                    print("        ", el1 + "__" + el3, el + "/" + el1 + "/" + el2 )

                    p2 = el1 + "__" + el3
                    p3 = el + "/" + el1 + "/" + el2

                    url3 = url2 + el2
                    #print(url3)
                    s, n, dg = transitionRanges(url3)
                    print(s, n, dg)
                    print("%-16s %-24s %-32s %-40s %8g %8g %8g" % (el, el1, p2, p3, s, n, dg), file=efile)


    efile.close()
    print("Scan complete")

if __name__ == '__main__':
    main()