import scrapy
from scrapy.crawler import CrawlerProcess
class healthSpider(scrapy.Spider):
name = "health"
start_urls = ["https://svhealthinvestors.com/people"]
def parse(self, response):
#Iterates over each profile featured on the page
for products in response.css("main>div>section.people>ul.container>li"):
#If located, the following fields are populated with each profile's name, position
#general location, sector and fund.
yield{
"name":products.css("li>a.person-card>strong::text").get(),
"position":products.css("li>a.person-card>span.person-card-role::text").get(),
"location":products.css("li").attrib["data-location"],
"sector":products.css("li").attrib["data-sector"],
"fund":products.css("li").attrib["data-fund"]
}
#Output each product response to one .xml file in the working directory
HealthProcess = CrawlerProcess({
'FEED_FORMAT': 'xml',
'FEED_URI': 'output.xml'
})
HealthProcess.crawl(healthSpider)
HealthProcess.start()