```r
library("ralger")
url <- "http://www.shanghairanking.com/rankings/arwu/2021"
# retrieve HTML and select elements using CSS selectors:
best_uni <- scrap(link = url, node = "a span", clean = TRUE)
head(best_uni, 5)
#> [1] "Harvard University"
#> [2] "Stanford University"
#> [3] "University of Cambridge"
#> [4] "Massachusetts Institute of Technology (MIT)"
#> [5] "University of California, Berkeley"
# ralger can also parse HTML attributes
attributes <- attribute_scrap(
link = "https://ropensci.org/",
node = "a", # the a tag
attr = "class" # getting the class attribute
)
head(attributes, 10) # NA values are a tags without a class attribute
#> [1] "navbar-brand logo" "nav-link" NA
#> [4] NA NA "nav-link"
#> [7] NA "nav-link" NA
#> [10] NA
#
# ralger can automatically scrape tables:
data <- table_scrap(link ="https://www.boxofficemojo.com/chart/top_lifetime_gross/?area=XWW")
head(data)
#> # A tibble: 6 × 4
#> Rank Title `Lifetime Gross` Year
#>
#> 1 1 Avatar $2,847,397,339 2009
#> 2 2 Avengers: Endgame $2,797,501,328 2019
#> 3 3 Titanic $2,201,647,264 1997
#> 4 4 Star Wars: Episode VII - The Force Awakens $2,069,521,700 2015
#> 5 5 Avengers: Infinity War $2,048,359,754 2018
#> 6 6 Spider-Man: No Way Home $1,901,216,740 2021
```
```go
package main
import (
"fmt"
"log"
"github.com/anaskhan96/soup"
)
func main() {
url := "https://www.bing.com/search?q=weather+Toronto"
# soup has basic HTTP client though it's not recommended for scraping:
resp, err := soup.Get(url)
if err != nil {
log.Fatal(err)
}
# create soup object from HTML
doc := soup.HTMLParse(resp)
# html elements can be found using Find or FindStrict methods:
# in this case find
elements where "class" attribute matches some values:
grid := doc.FindStrict("div", "class", "b_antiTopBleed b_antiSideBleed b_antiBottomBleed")
# note: to find all elements FindAll() method can be used the same way
# elements can be further searched for descendents:
heading := grid.Find("div", "class", "wtr_titleCtrn").Find("div").Text()
conditions := grid.Find("div", "class", "wtr_condition")
primaryCondition := conditions.Find("div")
secondaryCondition := primaryCondition.FindNextElementSibling()
temp := primaryCondition.Find("div", "class", "wtr_condiTemp").Find("div").Text()
others := primaryCondition.Find("div", "class", "wtr_condiAttribs").FindAll("div")
caption := secondaryCondition.Find("div").Text()
fmt.Println("City Name : " + heading)
fmt.Println("Temperature : " + temp + "˚C")
for _, i := range others {
fmt.Println(i.Text())
}
fmt.Println(caption)
}
```