ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/ns_dev/Python/NinoCode/Active_prgs/Redgrave/vCardSearchAndDownload.py
Revision: 929
Committed: Thu Aug 7 20:33:40 2025 UTC (7 months, 2 weeks ago) by nino.borges
Content type: text/x-python
File size: 4679 byte(s)
Log Message:
general use changes

File Contents

# Content
1 """
2
3 vCardSearchAndDownload
4
5 Created by:
6 Emanuel Borges
7 12.30.2024
8
9 This program uses chromedriver to first search for a person on a public company people search and then, if found, download the vCard for that person.
10
11 """
12
13 import os
14 import time
15 from selenium import webdriver
16 from selenium.webdriver.common.by import By
17 from selenium.webdriver.common.keys import Keys
18 from urllib.parse import urljoin
19 import requests
20 import urllib
21
22
23 #BASE_URL = "https://perkinscoie.com/people-search"
24 #BASE_URL = "https://www.klgates.com/people"
25 BASE_URL = "https://www.hoganlovells.com/en/our-people"
26
27 VCARD_DIR = "vcards"
28 os.makedirs(VCARD_DIR, exist_ok = True)
29
30
31 driver = webdriver.Chrome()
32
33 version = '0.3.0'
34
35
36 def search_and_download_vcards(person_name, element_id_name, quotedSearch = False, protectedDownload = False):
37 try:
38 driver.get(BASE_URL)
39
40 time.sleep(3)
41
42
43 search_box = driver.find_element(By.ID, element_id_name)
44 #search_box = driver.find_element(By.ID, "searchbox")
45 #search_box = driver.find_element(By.ID, "edit-keyword")
46 #search_box = driver.find_element(By.ID, "name3")
47 search_box.clear()
48 if quotedSearch == True:
49 search_box.send_keys(f'"{person_name}"')
50 else:
51 search_box.send_keys(person_name)
52
53 search_box.send_keys(Keys.RETURN)
54
55 time.sleep(3)
56
57
58 results = driver.find_elements(By.CSS_SELECTOR, "a[href*='vcard']")
59
60 if not results:
61 print(f"No vCards found for '{person_name}'.")
62 return
63
64
65 for link in results:
66 vcard_url = link.get_attribute("href")
67 if protectedDownload == True:
68 download_vcard_protected(vcard_url,person_name)
69 else:
70 download_vcard(vcard_url,person_name)
71
72 except Exception as e:
73 print(f"Error during search or download: {e}")
74 ## finally:
75 ## driver.quit()
76
77
78
79 def download_vcard(vcard_url, person_name):
80 try:
81 response = requests.get(vcard_url)
82 response.raise_for_status()
83
84 #filename = os.path.basename(vcard_url)
85 filename = f"{person_name}.vcf"
86 filepath = os.path.join(VCARD_DIR,filename)
87
88
89 with open(filepath,"wb") as file:
90 file.write(response.content)
91 print(f"Downloaded vCard: {filepath}")
92 except Exception as e:
93 print(f"Error downloading vCard from {vcard_url}: {e}")
94
95
96 def download_vcard_protected(vcard_url, person_name):
97 """Some sites are giving me a forbidden message, so making this second function that will use urllib instead of requests"""
98 try:
99 req = urllib.request.Request(vcard_url)
100 req.add_header('user-agent','Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36')
101 r = urllib.request.urlopen(req).read()#.decode('utf-8')
102
103 filename = f"{person_name}.vcf"
104 filepath = os.path.join(VCARD_DIR,filename)
105
106
107 with open(filepath,"wb") as file:
108 file.write(r)
109
110 print(f"Downloaded vCard: {filepath}")
111 except Exception as e:
112 print(f"Error downloading vCard from {vcard_url}: {e}")
113
114 if __name__ == '__main__':
115 #inputFilePath = r"C:\Users\eborges\OneDrive - Redgrave LLP\Documents\Cases\Amazon\MAL-NamesToFind\KLGATES.txt"
116 #inputFilePath = r"C:\Users\eborges\OneDrive - Redgrave LLP\Documents\Cases\Amazon\MAL-NamesToFind\KLGATES_SUB3.txt"
117 #inputFilePath = r"C:\Users\eborges\OneDrive - Redgrave LLP\Documents\Cases\Amazon\MAL-NamesToFind\TEST.txt"
118 inputFilePath = r"C:\Users\eborges\OneDrive - Redgrave LLP\Documents\Cases\Amazon\MAL-NamesToFind\HOGANLOVELLS_SUB4.txt"
119
120 element_id_name = "name3"
121
122
123
124 ## This can be a list of any value to search on but for now I need it to be unique, which is why I'm searching only on email addresses
125 listOfEmailAddresses = set()
126
127
128 contents = open(inputFilePath).readlines()
129 for line in contents:
130 line = line.replace("\n","")
131 uniqueRowNumb,emailAddr = line.split("|")
132 person_to_search = emailAddr
133 search_and_download_vcards(person_to_search, element_id_name, protectedDownload = True)
134
135
136
137
138 #person_to_search = "Gaia Bacchi"
139 #person_to_search = "tim.weston@klgates.com"
140 #person_to_search = "BPeters@perkinscoie.com"
141 #person_to_search = "Katie.McMullan@hoganlovells.com"
142 #search_and_download_vcards(person_to_search, element_id_name, protectedDownload = True)
143
144 driver.quit()