from lxml.html import fromstring
import httpx
import os,asyncio,time,datetime,pytz,sys,hashlib

ROOTSRC = "https://may2.ftbucket.info/may/cont/"


def get_src(index=-1,day=None):
    search_site = "https://may2.ftbucket.info/may/index.php?mode=c&f=0&w=0&s=%E6%B7%B1%E5%A4%9C%E3%81%AE%E3%82%B7%E3%83%A7%E3%83%B3%E3%83%9C%E3%83%AA%E3%81%9F%E3%81%AC%E3%81%8D%E3%82%B9%E3%83%AC"
    search_resp = httpx.get(search_site)
    assert search_resp.status_code == 200
    search_text = fromstring(search_resp.text)
    src_list = search_text.xpath("//td[contains(div/a/text(),'DL')]/div[not(a)]/text()")
    src_list = [ i.strip().split()[0] for i in src_list ]

    tindex = index
    if day and src_list:
        mark = datetime.datetime.strftime(day,'%d')
        if mark in src_list:
            tindex = src_list.index(mark)

    raw_src = search_text.xpath("//a[contains(text(),'DL')]/@href")[tindex]
    src = raw_src.lstrip("download.php?rooturl=http%3A%2F%2F").rstrip(".htm").replace("%2F","_")
    return ROOTSRC + src


def add_ref(src,xpath):
    res = "\n".join(src.xpath(xpath))
    if res:
        return res + "\n"
    return res


def get_urls_data(src):
    resp = httpx.get(src+"/index.htm")
    assert resp.status_code == 200
    resp.encoding = "shift_jis"

    html = fromstring(resp.text)

    header = html.xpath("//span[@class='thre']/a[img]/@href")[0]

    imgs = html.xpath("//table//td[@class='rtd' and a[starts-with(@href,'img') and not(substring(@href, string-length(@href) - string-length('mp4') +1) = 'mp4') and not(substring(@href, string-length(@href) - string-length('webm') +1) = 'webm') ]]")
    # "substring(@href, string-length(@href) - string-length('mp4') +1) = 'mp4'" ## ends-with('mp4')

    zips = html.xpath("//table//blockquote[starts-with(a/@href,'other')]")

    imgs_dict = [ {"reply":add_ref(i,"./blockquote/font/text()") + "\n".join(i.xpath("./blockquote/text()")),"url":i.xpath("./a[not(img) and starts-with(@href,'img')]/@href")} for i in imgs if "削除依頼によって隔離されました" not in "\n".join(i.xpath("./blockquote/text()")) ]
    zips_dict = [ {"reply":add_ref(i,"./font/text()") + "\n".join(i.xpath("./text()")), "url":i.xpath("./a/@href")} for i in zips ]
    thumb_dict = [ i.xpath("./a[img and starts-with(@href,'img')]/img/@src")[0] for i in imgs]

    return {"header":header,"imgs":imgs_dict,"zips":zips_dict,"thumb":thumb_dict}


def mkdir(src):
    if not os.path.exists(src):
        os.mkdir(src)



async def single_fetch(client,url,path,root):
    try:
        resp = await client.get(url, timeout=3600)
    except httpx.TimeoutException:
        print(f"Get {url} timedout,ignored it.")
        return
    except:
        print(f"Get {url} for many reason,ignored it.")
        return
    if resp.status_code != 200:
        print(f"get status code {resp.status_code} in {url},ignored it.")
    else:
        with open(os.path.join(root,path),"wb") as f:
            f.write(resp.content)

async def fetch(data,src,root):
    header = data["header"]
    imgs = data["imgs"]
    zips = data["zips"]

    imgs_list = [header]
    for i in imgs:
        imgs_list.extend(i["url"])

    zip_list = []

    for j in zips:
        zip_list.extend(j["url"])

    mkdir(os.path.join(root,"img"))
    mkdir(os.path.join(root,"other"))
    mkdir(os.path.join(root,"thumb"))

    async with httpx.AsyncClient() as r:
        task = []
        zip_task = []
        thumb_task = []
        print("start downloading...")

        for img in imgs_list:
            req = single_fetch(r,src+"/"+img,img,root)
            single_task = asyncio.create_task(req)
            task.append(single_task)
        for single_zip in zip_list:
            req = single_fetch(r,src+"/"+single_zip,single_zip,root)
            single_task = asyncio.create_task(req)
            zip_task.append(single_task)
        for single_thumb in data["thumb"]:
            req = single_fetch(r,src+"/"+single_thumb,single_thumb,root)
            single_task = asyncio.create_task(req)
            thumb_task.append(single_task)

        await asyncio.gather(*task)
        print("Download pic finnish")
        await asyncio.gather(*zip_task)
        print("Download zip finnish")
        await asyncio.gather(*thumb_task)
        print("Download thumb finnish")


def parse_reply(reply:str):
    raw = reply.splitlines()
    res = []
    for i in raw:
        if i.startswith(">"):
            res.append(f'<font color="#789922">{i}</font>')
        else:
            res.append(i)
    return "<br>".join(res)

def dump_html(data,root):
    block = '<table border=0><tr><td class=rts>...</td><td class=rtd><span class="rsc">{block_num}</span><br> &nbsp; &nbsp; {block_text}</td></tr></table>'
    block_link = '<a href="{link}">{name}</a><br><a href="{link}"><img src="{thumb}" border=0 align=left width=250 hspace=20 loading="lazy"></a>'
    block_reply = '<blockquote style="margin-left:290px;">{zips}{reply}</blockquote>'
    block_reply_noimg = '<blockquote>{zips}{reply}</blockquote>'

    
    header_block = block_link.format_map({"link":data["header"],"name":data["header"].lstrip("img/"),"thumb":data["header"]}) + "\n"

    zip_blocks = ""
    zip_block = '<a target=_blank href="{link}">{name}</a><br>'
    
    img_reply = [ i["reply"] for i in data["imgs"] ]
    no_img_block = [ i for i in data["zips"] if i["reply"] not in img_reply]

    for i in data["zips"]:
        for j in i["url"]:
            zip_block_dict = {"link":j,"name":j.lstrip("other/")}
            zip_blocks += "\n" + zip_block.format_map(zip_block_dict)

    img_blocks = ""
    for i,v in enumerate(data["imgs"]):
        block_link_dict = {"link":v["url"][0],"thumb":data["thumb"][i],"name":v["url"][0].lstrip("img/")}


        inner_zip_block = ""
        block_reply_dict = {"reply":parse_reply(v["reply"]),"zips":inner_zip_block}
        
        if "削除依頼によって隔離されました" in block_reply_dict["reply"]:
            continue

        for j in data["zips"]:
            if j["reply"] == v["reply"]:
                zip_block_dict = {"link":j["url"][0],"name":j["url"][0].lstrip("other/")}
                inner_zip_block += zip_block.format_map(zip_block_dict)
                block_reply_dict = {"reply":parse_reply(v["reply"]),"zips":inner_zip_block}
        
        content = block_link.format_map(block_link_dict)+"\n"+block_reply.format_map(block_reply_dict)

        block_dict = {"block_num":i+1,"block_text":content}
        img_blocks += "\n" + block.format_map(block_dict)

    for resti,restv in enumerate(no_img_block):
        zip_block_dict = {"link":restv["url"][0],"name":restv["url"][0].lstrip("other/")}
        inner_zip_block += zip_block.format_map(zip_block_dict)
        block_reply_dict = {"reply":parse_reply(restv["reply"]),"zips":inner_zip_block}
        content = block_reply_noimg.format_map(block_reply_dict)
        block_dict = {"block_num":len(data["imgs"])+resti+1,"block_text":content}
        img_blocks += "\n" + block.format_map(block_dict)
    

    with open(os.path.join(os.getcwd(),"origin.html"),"r",encoding="utf-8") as orgin:
        orgin_text = orgin.read()
    if os.path.exists(os.path.join(root,"index.html")):
        os.rename(os.path.join(root,"index.html"),os.path.join(root,"old.html"))
    with open(os.path.join(root,"index.html"),"w",encoding="utf-8") as dump:
        dump_text = orgin_text.replace("#header_block#",header_block)
        dump_text = dump_text.replace("#img_blocks#",img_blocks)
        dump_text = dump_text.replace("#zip_blocks#",zip_blocks)
        dump.write(dump_text)
    print("Dumped!")
    print("Checking")
    if os.path.exists(os.path.join(root,"old.html")):
        with open(os.path.join(root,"old.html"),"rb") as oldf:
            olddata = oldf.read()
        with open(os.path.join(root,"index.html"),"rb") as newf:
            newdata = newf.read()
        oldhash = hashlib.sha256(olddata).hexdigest()
        newhash = hashlib.sha256(newdata).hexdigest()
        print(oldhash)
        print(newhash)
        if oldhash == newhash:
            os.remove(os.path.join(root,"old.html"))
        else:
            print("Old File has been preserved")



def _main():

    index = -1
    if len(sys.argv) > 1 and sys.argv[1].isdigit():
        index = int(sys.argv[1])
    theday = datetime.datetime.fromtimestamp(time.time(), tz=pytz.timezone('Etc/GMT-8'))
    if index == -1:
        theday = theday - datetime.timedelta(days=1)
    root = os.path.join(os.getcwd(),datetime.datetime.strftime(theday,'%Y_%m_%d'))
    mkdir(root)
    try:
        src = get_src(index,day=theday)
    except AssertionError:
        raise AssertionError("Get Src Failed")

    try:
        data = get_urls_data(src)
    except AssertionError:
        raise AssertionError("Get URL Data Failed")


    asyncio.run(fetch(data,src,root))
    dump_html(data,root)

if __name__ == "__main__":
    print(os.getcwd())
    _main()