You have no items in your shopping cart.

Monday, November 25, 2024 1:10:25 PM

Rss support

Posted: 9 years ago

#583 Quote

fabio.parigi

Posted: 9 years ago

#583 Quote
P.S. Yes I want to automatically search for new articles from a pre-filled list of rss feed.

Posted: 9 years ago

#584 Quote

Support

Posted: 9 years ago

#584 Quote
Ok. I understand you. I'll read first image from description.

Posted: 9 years ago

#588 Quote

Support

Posted: 9 years ago

#588 Quote
If you have code for getting first image from HTML-description, send me it.
Thank you.  

Posted: 9 years ago

#1588 Quote

fabio.parigi

Posted: 9 years ago

#1588 Quote
This is my class. The method you need to retrieve the image is GetFirstImageUrl(string)


[code]
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using RSSImporter.Model;
using System.ServiceModel.Syndication;
using System.Xml;
using mshtml;
using System.Net;
using System.Drawing;
using System.IO;

namespace RSSImporter
{
    public class Importer
    {
        private SharedLibrary.ILogger Logger
        {
            get { return SharedLibrary.LoggerManager.GetLogger(RSSImporter); }
        }

        public void ReadAll()
        {
            int siteCounter = 0;
            int totalArticleCounter = 0;
            foreach (RSSProvider site in DBContext.RSSProviders)
            {
                try
                {
                    XmlReader reader = XmlReader.Create(site.RssUrl);
                    SyndicationFeed feed = SyndicationFeed.Load(reader);

                    Logger.Info(string.Format("Start reading feed {0}.", site.Name));

                    int articleCounter = 0;
                    foreach (SyndicationItem item in feed.Items)
                    {
                        if (!site.Hidden && (site.DontFilter || IsRilevant(item)))
                        {
                            string url = item.Links.First().Uri.AbsoluteUri;
              
              // Look if I've already downloaded it before...
                            FeedArticle news = site.FeedArticles.SingleOrDefault(no => no.Url.Equals(url));
              
              // ... If Not
                            if (news == null)
                            {
                                articleCounter++;
                                totalArticleCounter++;

                                news = new FeedArticle();
                                news.Site = site;
                                news.Title = item.Title.Text;
                                news.DateTime = item.PublishDate.DateTime;
                                news.Text = item.Summary.Text;
                                news.Url = url;
                                var textContent = (item.Content as TextSyndicationContent);
                                if (textContent != null)
                                {
                                    news.ImageUrl = GetFirstImageUrl(textContent.Text);
                                }

                                site.FeedArticles.Add(news);
                            }
                        }
                    }

                    Logger.Info(string.Format("End reading feed {0}. Imported {1} new articles", site.Name, articleCounter));

                    siteCounter++;
                }
                catch (Exception ex)
                {
                    Logger.ErrorException(string.Format("Error reading feed {0}.", site.Name), ex);
                }

            }

            DBContext.SaveChanges();
            Logger.Info(string.Format("End feed reading process. Read {0} articles from {1} feeds.", totalArticleCounter, siteCounter));
        }

        public static string GetFirstImageUrl(string htmlContent)
        {
            if (string.IsNullOrEmpty(htmlContent)) return null;

            HTMLDocument doc = new HTMLDocument();
            IHTMLDocument2 doc2 = (doc as IHTMLDocument2);
            doc2.write(htmlContent as object);

            IHTMLElementCollection imgs = doc.getElementsByTagName("img");
            int i = 0;
            while (true)
            {
                IHTMLElement img = imgs.item(i);
                if (img != null)
                {
                    string ImageUrl = img.getAttribute("src") as string;
                    Stream str = null;
                    HttpWebRequest wReq = (HttpWebRequest)WebRequest.Create(ImageUrl);
                    HttpWebResponse wRes = (HttpWebResponse)(wReq).GetResponse();
                    str

Posted: 9 years ago

#1589 Quote

fabio.parigi

Posted: 9 years ago

#1589 Quote
The message has been truncated without any warning... Here is the end of the class:


public static string GetFirstImageUrl(string htmlContent)
        {
            if (string.IsNullOrEmpty(htmlContent)) return null;

            HTMLDocument doc = new HTMLDocument();
            IHTMLDocument2 doc2 = (doc as IHTMLDocument2);
            doc2.write(htmlContent as object);

            IHTMLElementCollection imgs = doc.getElementsByTagName("img");
            int i = 0;
            while (true)
            {
                IHTMLElement img = imgs.item(i);
                if (img != null)
                {
                    string ImageUrl = img.getAttribute("src") as string;
                    Stream str = null;
                    HttpWebRequest wReq = (HttpWebRequest)WebRequest.Create(ImageUrl);
                    HttpWebResponse wRes = (HttpWebResponse)(wReq).GetResponse();
                    str = wRes.GetResponseStream();

                    var imageOrig = System.Drawing.Image.FromStream(str);
                    int height = imageOrig.Height;
                    int width = imageOrig.Width;
                    if (height > 50 && width > 50)
                    {
                        return ImageUrl;
                    }
                    else
                    {
                        i++;
                        continue;
                    }
                }
                else
                {
                    break;
                }
            }
            return null;
        }

        public static bool IsRilevant(SyndicationItem item)
        {
            return IsRilevant(item.Title) || IsRilevant(item.Summary) || IsRilevant(item.Content as TextSyndicationContent);
        }
        public static bool IsRilevant(TextSyndicationContent textContent)
        {
            if (textContent == null) return false;
            else return IsRilevant(textContent.Text);
        }
        public static bool IsRilevant(string text)
        {
            if (text == null) return false;
            return text.ToLower().Contains("keyword");
        }
    }
}

Posted: 9 years ago

#1601 Quote

Support

Posted: 9 years ago

#1601 Quote
I sent the file by email.

Posted: 9 years ago

#1602 Quote

fabio.parigi

Posted: 9 years ago

#1602 Quote
Support wrote:
you will have the problem with images.
I tested imports and ...
First image in RSS message is a left conner of the css frame...


The GetFirstImageUrl method I post checks if the image is bigger than 50x50, is that left corner so big??
You can increase size limit if you think it's too low.

Posted: 9 years ago

#1603 Quote

Support

Posted: 9 years ago

#1603 Quote
No, I used another method. But you are right... I must load image to disk and check it.

Powered by nopCommerce

Copyright © 2023 FoxNetSoft. All rights reserved