import os import requests from bs4 import BeautifulSoup def ensure_url_scheme(url): if not url.startswith(('http://', 'https://')): return 'https://' + url.lstrip('/') return url def get_publications_from_hal(author_id): # URL pour l'API de HAL #url = f"https://api.archives-ouvertes.fr/search/?q=authIdHal_s:{author_id}&wt=json&fl=title_s,authFullName_s,producedDateY_i,uri_s,conferenceTitle_s,journalTitle_s,doiId_s,docType_s" url = f"https://api.archives-ouvertes.fr/search/?q=authIdHal_s:alexandre-meyer&wt=json&fl=title_s,authFullName_s,producedDateY_i,uri_s,conferenceTitle_s,journalTitle_s,doiId_s,docType_s,software_s,&start=0&rows=50" # Envoyer une requête GET à l'API de HAL response = requests.get(url) # Vérifier si la requête a réussi if response.status_code == 200: data = response.json() publications = data['response']['docs'] return publications else: print(f"Erreur : Impossible de récupérer les données de l'API de HAL. Code de statut : {response.status_code}") return None def download_image(image_url, image_path): image_url = ensure_url_scheme(image_url) response = requests.get(image_url) if response.status_code == 200: with open(image_path, 'wb') as f: f.write(response.content) return True else: return False def create_html(publications): # Créer le dossier images s'il n'existe pas if not os.path.exists('images'): os.makedirs('images') # Trier les publications du plus récent au plus vieux publications.sort(key=lambda x: x.get('producedDateY_i', 0), reverse=True) html_content = """ <!DOCTYPE html> <html lang="fr"> <head> <meta charset="UTF-8"> <title>Publications</title> <style> ul { list-style-type: disc; padding-left: 20px; } .custom-year-section { margin-top: 20px; } .custom-publication { margin-bottom: 0px; margin: 0; } img { max-width: 100px; height: auto; margin-right: 10px; } custom-p { margin: 0; } </style> </head> <body> <h1>Publications List</h1> """ current_year = None for pub in publications: title_list = pub.get('title_s', []) title = title_list[0] if title_list else '' authors = ', '.join(pub.get('authFullName_s', [])) year = pub.get('producedDateY_i', '') uri = pub.get('uri_s', '') conference = pub.get('conferenceTitle_s', '') journal = pub.get('journalTitle_s', '') thesis = pub.get('thesisDegreeGrantor_s', '') doi = pub.get('doiId_s', '') doc_type = pub.get('docType_s', '') # Télécharger l'image de la publication si disponible image_path = '' if uri: #print(requests.get(uri).content) soup = BeautifulSoup(requests.get(uri).content, 'html.parser') img_tag = soup.find('img') if img_tag and 'src' in img_tag.attrs: image_url = img_tag['src'] # Remplacer les caractères spéciaux et les espaces dans le titre safe_title = title.replace(' ', '_').replace('/', '_').replace('\\', '_').replace(':', '_').replace('*', '_').replace('?', '_').replace('"', '_').replace('<', '_').replace('>', '_').replace('|', '_') image_path = f"web/content/research/images/{safe_title}.jpg" download_image(image_url, image_path) # Ajouter une section pour chaque année if year != current_year: if current_year is not None: html_content += "</ul></div>" current_year = year html_content += f"<div class='custom-year-section'><h2>{year}</h2><ul>" # # Construire la référence complète de la publication # reference_parts = [title] # if authors: # reference_parts.append(authors) # if doc_type == 'HDR': # reference_parts.append("HDR") # if doc_type == 'REPORT': # reference_parts.append("Research Report") # if doc_type == 'THESE': # reference_parts.append("Thesis") # if doc_type == 'UNDEFINED': # reference_parts.append("Code") # print(doc_type) # if conference or journal: # reference_parts.append(f"{conference}, {journal}, {year}") # reference_text = '<br>'.join(part for part in reference_parts if part) # Construire la référence complète de la publication reference_parts = [title] if authors: reference_parts.append(authors) if doc_type == 'HDR': pubtype = "HDR" elif doc_type == 'REPORT': pubtype = "Research Report" elif doc_type == 'THESE': pubtype = "Thesis" elif doc_type == 'UNDEFINED': pubtype = "Pre-publication" print(doc_type + " title=" + title) elif doc_type == 'COUV': pubtype = "Book chapter" elif doc_type == 'SOFTWARE': pubtype = "Software" elif conference or journal: pubtype = conference if conference else journal else: pubtype = "Other" print(doc_type + " title=" + title) # Ajouter la publication à la section de l'année correspondante html_content += f""" <li class="custom-publication"> <custom-p><strong>{title}</strong></custom-p> <custom-p>{authors}</custom-p> <custom-p>{pubtype}, {year}</custom-p> <custom-p><a href="{uri}">Lien vers HAL</a> {f'<a href="https://doi.org/{doi}">, Lien DOI</a>' if doi else ''}</custom-p> </li> """ html_content += """ </ul></div> </body> </html> """ with open('web/content/research/publications.html', 'w', encoding='utf-8') as f: f.write(html_content) # Exemple d'utilisation author_id = "alexandre-meyer" publications = get_publications_from_hal(author_id) if publications: create_html(publications)