Geocoding French adresses with BanR

Paul-Antoine Chevalier (Etalab), Joël Gombin (Datactivist)

2025-11-18

library("tibble")
library("dplyr")
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library("banR")

table_test <- tibble::tibble(
  adress = c("39 quai André Citroën", "64 Allée de Bercy", "20 avenue de Ségur"),
  postal_code = c("75015", "75012", "75007"),
  z = rnorm(3)
  )

Geocode

Geocoding is the process of transforming a human readable address into a location (ie a pair of latitude and longitude).

A single address

geocode(query = "39 quai André Citroën, Paris") %>%
  glimpse()
Rows: 1
Columns: 19
$ label       <chr> "39 Quai André Citroën 75015 Paris"
$ score       <dbl> 0.9801645
$ housenumber <chr> "39"
$ id          <chr> "75115_0318_00039"
$ name        <chr> "39 Quai André Citroën"
$ postcode    <chr> "75015"
$ citycode    <chr> "75115"
$ x           <dbl> 647082.8
$ y           <dbl> 6861010
$ city        <chr> "Paris"
$ district    <chr> "Paris 15e Arrondissement"
$ context     <chr> "75, Paris, Île-de-France"
$ type        <chr> "housenumber"
$ importance  <dbl> 0.78181
$ street      <chr> "Quai André Citroën"
$ `_type`     <chr> "address"
$ type_geo    <chr> "Point"
$ longitude   <dbl> 2.278922
$ latitude    <dbl> 48.84696

The BAN API sends back both projected/Cartesian coordinates (x and y columns - they use Lambert 93 projection, aka as EPSG:2154), and lon/lat (i.e. WGS84) coordinates (longitude and latitude columns). It also indicates the degree of confidence it has in each result (column score). The above example only sends back one result, but sometimes the API will send back several suggestion for the same query. They are ordered by descending order of confidence.

A data frame

In addition to the adress, geocode_tbl() can take as argument either the postal code or the French official code (INSEE code) of the commune.

geocode_tbl(tbl = table_test, adresse = adress) %>%
  glimpse()
Rows: 3
Columns: 21
$ postal_code        <chr> "75015", "75012", "75007"
$ z                  <dbl> -1.316912, 2.787876, 1.313769
$ adress             <chr> "39 quai André Citroën", "64 Allée de Bercy", "20 a…
$ longitude          <dbl> 2.278922, 2.376011, 2.308628
$ latitude           <dbl> 48.84696, 48.84254, 48.85070
$ result_score       <dbl> 0.9801645, 0.9729327, 0.9716455
$ result_score_next  <chr> "0.8013699999999999", NA, "0.9532445454545454"
$ result_label       <chr> "39 Quai André Citroën 75015 Paris", "64 Allée de B…
$ result_type        <chr> "housenumber", "housenumber", "housenumber"
$ result_id          <chr> "75115_0318_00039", "75112_0874_00064", "75107_8909…
$ result_housenumber <chr> "39", "64", "20"
$ result_name        <chr> "39 Quai André Citroën", "64 Allée de Bercy", "20 A…
$ result_street      <chr> "Quai André Citroën", "Allée de Bercy", "Avenue de …
$ result_postcode    <chr> "75015", "75012", "75007"
$ result_city        <chr> "Paris", "Paris", "Paris"
$ result_context     <chr> "75, Paris, Île-de-France", "75, Paris, Île-de-Fran…
$ result_citycode    <chr> "75115", "75112", "75107"
$ result_oldcitycode <chr> NA, NA, NA
$ result_oldcity     <chr> NA, NA, NA
$ result_district    <chr> "Paris 15e Arrondissement", "Paris 12e Arrondisseme…
$ result_status      <chr> "ok", "ok", "ok"
geocode_tbl(tbl = table_test, adresse = adress, code_postal = postal_code) %>%
  glimpse()
Rows: 3
Columns: 21
$ z                  <dbl> -0.1769586, 1.2007049, -0.3180847
$ adress             <chr> "39 quai André Citroën", "64 Allée de Bercy", "20 a…
$ postal_code        <chr> "75015", "75012", "75007"
$ longitude          <dbl> 2.278922, 2.376011, 2.308628
$ latitude           <dbl> 48.84696, 48.84254, 48.85070
$ result_score       <dbl> 0.9801645, 0.9729327, 0.9716455
$ result_score_next  <chr> NA, "0.600363971291866", "0.38506487603305783"
$ result_label       <chr> "39 Quai André Citroën 75015 Paris", "64 Allée de B…
$ result_type        <chr> "housenumber", "housenumber", "housenumber"
$ result_id          <chr> "75115_0318_00039", "75112_0874_00064", "75107_8909…
$ result_housenumber <chr> "39", "64", "20"
$ result_name        <chr> "39 Quai André Citroën", "64 Allée de Bercy", "20 A…
$ result_street      <chr> "Quai André Citroën", "Allée de Bercy", "Avenue de …
$ result_postcode    <chr> "75015", "75012", "75007"
$ result_city        <chr> "Paris", "Paris", "Paris"
$ result_context     <chr> "75, Paris, Île-de-France", "75, Paris, Île-de-Fran…
$ result_citycode    <chr> "75115", "75112", "75107"
$ result_oldcitycode <chr> NA, NA, NA
$ result_oldcity     <chr> NA, NA, NA
$ result_district    <chr> "Paris 15e Arrondissement", "Paris 12e Arrondisseme…
$ result_status      <chr> "ok", "ok", "ok"
data("paris2012")
paris2012 %>%
  slice(1:100) %>%
  mutate(
    adresse = paste(numero, voie, nom),
    code_insee = paste0("751", arrondissement)
    ) %>%
  geocode_tbl(adresse = adresse, code_insee = code_insee) %>%
  glimpse()
Rows: 100
Columns: 27
$ arrondissement     <chr> "06", "06", "06", "06", "06", "06", "06", "06", "06…
$ bureau             <chr> "09", "09", "09", "09", "09", "09", "09", "09", "09…
$ numero             <int> 4, 5, 6, 7, 8, 11, 12, 13, 14, 16, 3, 4, 5, 6, 7, 8…
$ voie               <chr> "RUE DE L", "RUE DE L", "RUE DE L", "RUE DE L", "RU…
$ nom                <chr> "ABBAYE", "ABBAYE", "ABBAYE", "ABBAYE", "ABBAYE", "…
$ nb                 <int> 1, 1, 20, 2, 17, 2, 9, 15, 17, 8, 13, 6, 6, 3, 9, 1…
$ ID                 <chr> "0609", "0609", "0609", "0609", "0609", "0609", "06…
$ adresse            <chr> "4 RUE DE L ABBAYE", "5 RUE DE L ABBAYE", "6 RUE DE…
$ code_insee         <chr> "75106", "75106", "75106", "75106", "75106", "75106…
$ longitude          <dbl> 2.335715, 2.335172, 2.335352, 2.335041, 2.334903, 2…
$ latitude           <dbl> 48.85405, 48.85407, 48.85414, 48.85410, 48.85425, 4…
$ result_score       <dbl> 0.9663627, 0.9663627, 0.9663627, 0.9663627, 0.96636…
$ result_score_next  <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ result_label       <chr> "4 Rue de l'Abbaye 75006 Paris", "5 Rue de l'Abbaye…
$ result_type        <chr> "housenumber", "housenumber", "housenumber", "house…
$ result_id          <chr> "75106_0002_00004", "75106_0002_00005", "75106_0002…
$ result_housenumber <chr> "4", "5", "6", "7", "8", "11", "12", "13", "14", "1…
$ result_name        <chr> "4 Rue de l'Abbaye", "5 Rue de l'Abbaye", "6 Rue de…
$ result_street      <chr> "Rue de l'Abbaye", "Rue de l'Abbaye", "Rue de l'Abb…
$ result_postcode    <chr> "75006", "75006", "75006", "75006", "75006", "75006…
$ result_city        <chr> "Paris", "Paris", "Paris", "Paris", "Paris", "Paris…
$ result_context     <chr> "75, Paris, Île-de-France", "75, Paris, Île-de-Fran…
$ result_citycode    <chr> "75106", "75106", "75106", "75106", "75106", "75106…
$ result_oldcitycode <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ result_oldcity     <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ result_district    <chr> "Paris 6e Arrondissement", "Paris 6e Arrondissement…
$ result_status      <chr> "ok", "ok", "ok", "ok", "ok", "ok", "ok", "ok", "ok…

Reverse geocode

Reverse geocoding is the process of back (reverse) coding of a point location (latitude, longitude) to a human readable address.

A single adress

reverse_geocode() takes longitude and latitude as arguments and returns a data frame with addresses.

reverse_geocode(long =  2.279092, lat = 48.84683)  %>%
  glimpse()
Rows: 10
Columns: 23
$ type        <chr> "housenumber", "housenumber", "housenumber", "housenumber"…
$ name        <chr> "39a Quai André Citroën", "39 Quai André Citroën", "43a Qu…
$ label       <chr> "39a Quai André Citroën 75015 Paris", "39 Quai André Citro…
$ street      <chr> "Quai André Citroën", "Quai André Citroën", "Quai André Ci…
$ postcode    <chr> "75015", "75015", "75015", "75015", "75015", "75015", "750…
$ citycode    <chr> "75115", "75115", "75115", "75115", "75115", "75115", "751…
$ city        <chr> "Paris", "Paris", "Paris", "Paris", "Paris", "Paris", "Par…
$ oldcitycode <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA
$ oldcity     <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA
$ district    <chr> "Paris 15e Arrondissement", "Paris 15e Arrondissement", "P…
$ context     <chr> "75, Paris, Île-de-France", "75, Paris, Île-de-France", "7…
$ importance  <dbl> 0.78181, 0.78181, 0.78181, 0.78181, 0.82311, 0.78181, 0.82…
$ housenumber <chr> "39a", "39", "43a", "41", "8a", "37", "8", "10", "43", "12"
$ id          <chr> "75115_0318_00039_a", "75115_0318_00039", "75115_0318_0004…
$ banId       <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA
$ x           <dbl> 647094.3, 647082.8, 647087.1, 647071.8, 647110.9, 647095.8…
$ y           <dbl> 6860995, 6861010, 6860976, 6860999, 6861016, 6861024, 6861…
$ distance    <int> 1, 19, 21, 24, 26, 29, 30, 32, 36, 40
$ score       <dbl> 0.9999, 0.9981, 0.9979, 0.9976, 0.9974, 0.9971, 0.9970, 0.…
$ `_type`     <chr> "address", "address", "address", "address", "address", "ad…
$ type_geo    <chr> "Point", "Point", "Point", "Point", "Point", "Point", "Poi…
$ longitude   <dbl> 2.279081, 2.278922, 2.278985, 2.278774, 2.279305, 2.279098…
$ latitude    <dbl> 48.84683, 48.84696, 48.84665, 48.84686, 48.84701, 48.84709…

A data frame

reverse_geocode_tbl takes the names of the longitude and latitude columns and returns a data frame with adresses.

test_df <- tibble::tibble(
  nom = sample(letters, size = 10, replace = FALSE),
  lon = runif(10, 2.19, 2.47),
  lat = runif(10, 48.8, 48.9)
)

test_df %>%
  reverse_geocode_tbl(lon, lat) %>%
  glimpse()
Rows: 10
Columns: 20
$ nom                <chr> "p", "n", "i", "j", "f", "c", "o", "u", "q", "a"
$ longitude          <dbl> 2.209505, 2.245517, 2.286644, 2.382297, 2.193622, 2…
$ latitude           <dbl> 48.82198, 48.89566, 48.85378, 48.83758, 48.88390, 4…
$ result_longitude   <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA
$ result_latitude    <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA
$ result_distance    <int> 14, 10, 46, 4, 35, NA, 4, 8, 45, 3
$ result_label       <chr> "Rue Léon Cladel 92310 Sèvres", "71 Rue Segoffin 92…
$ result_type        <chr> "street", "housenumber", "housenumber", "housenumbe…
$ result_id          <chr> "92072_1130", "92026_8535_00071", "75115_4313_00016…
$ result_housenumber <chr> NA, "71", "16a", "55", "69", NA, "203", "52", "39",…
$ result_name        <chr> "Rue Léon Cladel", "71 Rue Segoffin", "16a Quai de …
$ result_street      <chr> NA, "Rue Segoffin", "Quai de Grenelle", "Rue de Ber…
$ result_postcode    <chr> "92310", "92400", "75015", "75012", "92000", NA, "7…
$ result_city        <chr> "Sèvres", "Courbevoie", "Paris", "Paris", "Nanterre…
$ result_context     <chr> "92, Hauts-de-Seine, Île-de-France", "92, Hauts-de-…
$ result_citycode    <chr> "92072", "92026", "75115", "75112", "92050", NA, "7…
$ result_oldcitycode <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA
$ result_oldcity     <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA
$ result_district    <chr> NA, NA, "Paris 15e Arrondissement", "Paris 12e Arro…
$ result_status      <chr> "ok", "ok", "ok", "ok", "ok", "not-found", "ok", "o…