<?php
/**
 * ScrapeRSS.class.php
 * 
 * @version  0.2
 * @author   Paul Bissex <pb@e-scribe.com>
 * @license  LGPL
 *
 * Usage:
 *
 * $myFeed = new ScrapeRSS ("http://example.com/headlines/");
 * $myFeed->title = "Example.com Headlines";
 * $myFeed->site_url = "http://example.com/";
 * $myFeed->link_base = "http://example.com/shownews/";
 * $myFeed->regex = '/<a href="(.+)">(.+)<\/a>/i';
 * $myFeed->description = "News about examples from around the world";
 * $myFeed->send_rss();
 *
 * TODO: describe regex format WRT pieces
 *       and add description fetching?
 */

class ScrapeRSS
    
{
    var 
$_scrape_url;
    var 
$title;
    var 
$site_url;
    var 
$link_base;
    var 
$regex;    
    var 
$description;
    var 
$language "en";
    
     
    function 
ScrapeRSS ($url)
        {
        
$this->_scrape_url $url;
        
$this->site_url $url;
        
$this->link_base $url;
         
$this->regex '/<a href="(.+)">(.+)<\/a>/i';
        } 
        
    
    function 
_linktext ($text)
        {
        
$status preg_match ($this->regex$text$matches);
        
$results [0] = $status;
        foreach (
$matches as $key => $match)
            {
            
$results [$key] = ($match);
            }
        return 
$results;
        }


    function 
_fetch()
        {
        
$lines file ($this->_scrape_url); 
        
        
$stories = array();
        foreach (
$lines as $line)
            {
            list (
$status$item ['url'], $item ['headline']) = $this->_linktext ($line);
            if (
$status)
                {
                
$item ['body'] = "";
                
$item ['url'] = $this->link_base $item ['url'];
                
$stories[] = $item;
                }
            }
        return 
$stories;
        }

    
    function 
_output_headlines_rss ($stories)
        {
        
/// concatenation workaround for BBEdit syntax coloring glitch ///
        
$XML_TAG "<" "?xml version='1.0'?" ">";
        
$xml = <<<HEREDOC
$XML_TAG
<!-- name="generator" content="e-scribe.com ScrapeRSS.class.php/0.2" -->
<!DOCTYPE rss PUBLIC "-//Netscape Communications//DTD RSS 0.91//EN" "http://my.netscape.com/publish/formats/rss-0.91.dtd">
<rss version="0.91">
<channel>
<title>$this
->title</title>
<link>$this
->site_url</link>
<description>$this
->description</description>
<language>$this
->language</language>
HEREDOC;
        foreach (
$stories as $story)
            {
            
extract ($story);
            
$xml .= "\n<item><title>$headline</title>\n<link>";
            
$xml .=  ($url) . "</link>\n<description>";
            
$xml .= htmlspecialchars ($body);
            
$xml .= "</description>\n</item>\n";
            }
        
$xml .= "</channel>\n</rss>";
        return 
$xml;
        }
    
    
    function 
send_rss()
        {
        
$stories $this->_fetch ($this->url);
        
$xml $this->_output_headlines_rss ($stories);
        
header ("Content-type: text/xml");
        print 
$xml;
        }
    }


?>