Site scrapper

RickGAE

New member
Jan 22, 2016
8
0
0
The first website is succesfull in the code. Only the second not yet, I can't find the Playvid in the html code from the sourcesite, who can help me?

I have now this:
Code:
http://sterren.avrotros.nl/video-s/
Code:
import urllib, urllib2, re, cookielib, os, sys, socket
import xbmc, xbmcplugin, xbmcgui, xbmcaddon

import utils, sqlite3


def Main():
    utils.addDir('Nieuwste uitzendingen','http://sterren.avrotros.nl/video-s/meer-uitzendingen/',227,'','')
    utils.addDir('Nieuwste clips','http://sterren.avrotros.nl/video-s/nieuwe-videoclips-overzicht/',227,'','')
    utils.addDir('De Zomer Voorbij','http://sterren.avrotros.nl/programma-s/tv-pips/de-zomer-voorbij/dezomervoorbij-tv-videos1/video-overzicht/',227,'','')
    utils.addDir('Specials','http://sterren.avrotros.nl/video-s/video-specials/',227,'','')
    utils.addDir('De Beste Zangers van Nederland','http://sterren.avrotros.nl/programma-s/tv-pips/de-beste-zangers-van-nl/debestezangersvannl-tv-videos/alle-video-s/',227,'','')
    utils.addDir('Sterrenparade clips','http://sterren.avrotros.nl/programma-s/tv-pips/sterrenparade/sterrenparade-tv-videos/overzicht-sterrenparade-clips/',227,'','')
    utils.addDir('TROS Muziekfeest','http://sterren.avrotros.nl/programma-s/tv-pips/sterren-muziekfeest/sterrenmuziekfeest-tv-videos1/optredens-muziekfeest-op-het-plein/',227,'','')
    utils.addDir('Op Volle Toeren','http://sterren.avrotros.nl/video-s/meer-clips-op-volle-toeren/',227,'','')    
    xbmcplugin.endOfDirectory(utils.addon_handle)


def List(url, page=None):
    listhtml = utils.getHtml2(url)
    match = re.compile(r'<a class="rounded-img" href="(.+?)">.+?<img src="(.+?)" alt=".+? />.+?<span class="video-item-title" title="(.+?)">.+?<span class="video-item-subtitle" title="(.+?)">', re.DOTALL | re.IGNORECASE).findall(listhtml)
    for videopage, img, name, subname in match:
        name = utils.cleantext(name) + " - " + subname
        videopage = "http://sterren.avrotros.nl" + videopage
        utils.addDownLink(name, videopage, 228, img, '')
    try:
        page = page + 1
        nextp=re.compile('href="([^"]+)"> Vol', re.DOTALL | re.IGNORECASE).findall(listhtml)[0]
        next = "http://www.sterren.avrotros.nl" + nextp.replace("&","&")
        utils.addDir('Volgende Pagina ('+str(page)+')', next, 227,'', page)
    except: pass
    xbmcplugin.endOfDirectory(utils.addon_handle)

def Playvid(url, name):
    listhtml = utils.getHtml2(url)
    match = re.compile('<video src="([^"]+)"', re.DOTALL | re.IGNORECASE).findall(listhtml)
    if match:
        videourl = match[0]
        videourl = videourl.replace(" ","%20")
        iconimage = xbmc.getInfoImage("ListItem.Thumb")
        listitem = xbmcgui.ListItem(name, iconImage="DefaultVideo.png", thumbnailImage=iconimage)
        listitem.setInfo('video', {'Title': name, 'Genre': 'Music'})
        listitem.setProperty("IsPlayable","true")
        if int(sys.argv[1]) == -1:
            pl = xbmc.PlayList(xbmc.PLAYLIST_VIDEO)
            pl.clear()
            pl.add(videourl, listitem)
            xbmc.Player().play(pl)
        else:
            listitem.setPath(str(videourl))
            xbmcplugin.setResolvedUrl(utils.addon_handle, True, listitem)
    else:
        utils.notify('Oh oh','Couldn\'t find a playable video link')
 

nickat

Developer
Mar 17, 2015
6
0
0
In the regexp of the Playvid function you are searching for strings containing '<video', but there is no such HTML-Tag on the video pages of the site. It looks like the flash-video player is embedded into the page with javascript (look at the code after the line '<div class="block merge"><h3 class="block-header">Videoplayer</h3><div id="fb-root"></div><script>'). Now, the question is how do you come form that javascript code to a resolved link to the video file? Unfortunately, I have no idea how to do that.