import os import requests from bs4 import BeautifulSoup def ensure_url_scheme(url): if not url.startswith(('http://', 'https://')): return 'https://' + url.lstrip('/') return url def get_publications_from_hal(author_id): # URL pour l'API de HAL url = f"https://api.archives-ouvertes.fr/search/?q=authIdHal_s:{author_id}&wt=json&fl=title_s,authFullName_s,producedDateY_i,uri_s,conferenceTitle_s,journalTitle_s,doiId_s,docType_s" # Envoyer une requête GET à l'API de HAL response = requests.get(url) # Vérifier si la requête a réussi if response.status_code == 200: data = response.json() publications = data['response']['docs'] return publications else: print(f"Erreur : Impossible de récupérer les données de l'API de HAL. Code de statut : {response.status_code}") return None def download_image(image_url, image_path): image_url = ensure_url_scheme(image_url) response = requests.get(image_url) if response.status_code == 200: with open(image_path, 'wb') as f: f.write(response.content) return True else: return False def create_html(publications): # Créer le dossier images s'il n'existe pas if not os.path.exists('images'): os.makedirs('images') # Trier les publications du plus récent au plus vieux publications.sort(key=lambda x: x.get('producedDateY_i', 0), reverse=True) html_content = """ <!DOCTYPE html> <html lang="fr"> <head> <meta charset="UTF-8"> <title>Publications</title> <style> ul { list-style-type: disc; padding-left: 20px; } .year-section { margin-top: 20px; } .publication { margin-bottom: 10px; } img { max-width: 100px; height: auto; margin-right: 10px; } p { margin: 0; } </style> </head> <body> <h1>Liste des Publications</h1> """ current_year = None for pub in publications: title_list = pub.get('title_s', []) title = title_list[0] if title_list else '' authors = ', '.join(pub.get('authFullName_s', [])) year = pub.get('producedDateY_i', '') uri = pub.get('uri_s', '') conference = pub.get('conferenceTitle_s', '') journal = pub.get('journalTitle_s', '') doi = pub.get('doiId_s', '') doc_type = pub.get('docType_s', '') # Télécharger l'image de la publication si disponible image_path = '' if uri: soup = BeautifulSoup(requests.get(uri).content, 'html.parser') img_tag = soup.find('img') if img_tag and 'src' in img_tag.attrs: image_url = img_tag['src'] # Remplacer les caractères spéciaux et les espaces dans le titre safe_title = title.replace(' ', '_').replace('/', '_').replace('\\', '_').replace(':', '_').replace('*', '_').replace('?', '_').replace('"', '_').replace('<', '_').replace('>', '_').replace('|', '_') image_path = f"web/content/research/images/{safe_title}.jpg" download_image(image_url, image_path) # Ajouter une section pour chaque année if year != current_year: if current_year is not None: html_content += "</ul></div>" current_year = year html_content += f"<div class='year-section'><h2>{year}</h2><ul>" # Construire la référence complète de la publication reference_parts = [title] if authors: reference_parts.append(authors) if doc_type == 'HDR': reference_parts.append("HDR, Université") if doc_type == 'Rapport de recherche': reference_parts.append("Rapport de recherche") if doc_type == 'Logiciel': reference_parts.append("Code informatique") if conference or journal: reference_parts.append(f"{conference}, {journal}, {year}") reference_text = '<br>'.join(part for part in reference_parts if part) # Ajouter la publication à la section de l'année correspondante html_content += f""" <li class="publication"> <p><strong>{title}</strong></p> <p>{authors}</p> <p>{conference or journal}, {year}</p> <p><a href="{uri}">Lien vers HAL</a> {f'<a href="https://doi.org/{doi}">, Lien DOI</a>' if doi else ''}</p> </li> """ html_content += """ </ul></div> </body> </html> """ with open('web/content/research/publications.html', 'w', encoding='utf-8') as f: f.write(html_content) # Exemple d'utilisation author_id = "alexandre-meyer" publications = get_publications_from_hal(author_id) if publications: create_html(publications)