Skip to content
Snippets Groups Projects
extract_html.py 4.98 KiB
Newer Older
  • Learn to ignore specific revisions
  • Alexandre MEYER's avatar
    Alexandre MEYER committed
    import os
    
    Alexandre MEYER's avatar
    Alexandre MEYER committed
    import requests
    from bs4 import BeautifulSoup
    
    
    Alexandre MEYER's avatar
    Alexandre MEYER committed
    def ensure_url_scheme(url):
        if not url.startswith(('http://', 'https://')):
            return 'https://' + url.lstrip('/')
        return url
    
    
    Alexandre MEYER's avatar
    Alexandre MEYER committed
    def get_publications_from_hal(author_id):
        # URL pour l'API de HAL
    
    Alexandre MEYER's avatar
    Alexandre MEYER committed
        url = f"https://api.archives-ouvertes.fr/search/?q=authIdHal_s:{author_id}&wt=json&fl=title_s,authFullName_s,producedDateY_i,uri_s,conferenceTitle_s,journalTitle_s,doiId_s,docType_s"
    
    Alexandre MEYER's avatar
    Alexandre MEYER committed
    
        # Envoyer une requête GET à l'API de HAL
        response = requests.get(url)
    
        # Vérifier si la requête a réussi
        if response.status_code == 200:
            data = response.json()
            publications = data['response']['docs']
            return publications
        else:
            print(f"Erreur : Impossible de récupérer les données de l'API de HAL. Code de statut : {response.status_code}")
            return None
    
    def download_image(image_url, image_path):
    
    Alexandre MEYER's avatar
    Alexandre MEYER committed
        image_url = ensure_url_scheme(image_url)
    
    Alexandre MEYER's avatar
    Alexandre MEYER committed
        response = requests.get(image_url)
        if response.status_code == 200:
            with open(image_path, 'wb') as f:
                f.write(response.content)
            return True
        else:
            return False
    
    def create_html(publications):
    
    Alexandre MEYER's avatar
    Alexandre MEYER committed
        # Créer le dossier images s'il n'existe pas
        if not os.path.exists('images'):
            os.makedirs('images')
    
        # Trier les publications du plus récent au plus vieux
        publications.sort(key=lambda x: x.get('producedDateY_i', 0), reverse=True)
    
    
    Alexandre MEYER's avatar
    Alexandre MEYER committed
        html_content = """
        <!DOCTYPE html>
        <html lang="fr">
        <head>
            <meta charset="UTF-8">
            <title>Publications</title>
            <style>
    
    Alexandre MEYER's avatar
    Alexandre MEYER committed
                ul {
                    list-style-type: disc;
                    padding-left: 20px;
    
    Alexandre MEYER's avatar
    Alexandre MEYER committed
                }
    
    Alexandre MEYER's avatar
    Alexandre MEYER committed
                .year-section {
                    margin-top: 20px;
    
    Alexandre MEYER's avatar
    Alexandre MEYER committed
                }
    
    Alexandre MEYER's avatar
    Alexandre MEYER committed
                .publication {
                    margin-bottom: 10px;
    
    Alexandre MEYER's avatar
    Alexandre MEYER committed
                }
                img {
                    max-width: 100px;
                    height: auto;
    
    Alexandre MEYER's avatar
    Alexandre MEYER committed
                    margin-right: 10px;
                }
                p {
                    margin: 0;
    
    Alexandre MEYER's avatar
    Alexandre MEYER committed
                }
            </style>
        </head>
        <body>
            <h1>Liste des Publications</h1>
        """
    
    
    Alexandre MEYER's avatar
    Alexandre MEYER committed
        current_year = None
    
    
    Alexandre MEYER's avatar
    Alexandre MEYER committed
        for pub in publications:
    
    Alexandre MEYER's avatar
    Alexandre MEYER committed
            title_list = pub.get('title_s', [])
            title = title_list[0] if title_list else ''
    
    Alexandre MEYER's avatar
    Alexandre MEYER committed
            authors = ', '.join(pub.get('authFullName_s', []))
    
    Alexandre MEYER's avatar
    Alexandre MEYER committed
            year = pub.get('producedDateY_i', '')
    
    Alexandre MEYER's avatar
    Alexandre MEYER committed
            uri = pub.get('uri_s', '')
    
    Alexandre MEYER's avatar
    Alexandre MEYER committed
            conference = pub.get('conferenceTitle_s', '')
            journal = pub.get('journalTitle_s', '')
    
    Alexandre MEYER's avatar
    Alexandre MEYER committed
            doi = pub.get('doiId_s', '')
            doc_type = pub.get('docType_s', '')
    
    Alexandre MEYER's avatar
    Alexandre MEYER committed
    
            # Télécharger l'image de la publication si disponible
            image_path = ''
            if uri:
                soup = BeautifulSoup(requests.get(uri).content, 'html.parser')
                img_tag = soup.find('img')
                if img_tag and 'src' in img_tag.attrs:
                    image_url = img_tag['src']
    
    Alexandre MEYER's avatar
    Alexandre MEYER committed
                    # Remplacer les caractères spéciaux et les espaces dans le titre
                    safe_title = title.replace(' ', '_').replace('/', '_').replace('\\', '_').replace(':', '_').replace('*', '_').replace('?', '_').replace('"', '_').replace('<', '_').replace('>', '_').replace('|', '_')
    
    Alexandre MEYER's avatar
    Alexandre MEYER committed
                    image_path = f"web/content/research/images/{safe_title}.jpg"
    
    Alexandre MEYER's avatar
    Alexandre MEYER committed
                    download_image(image_url, image_path)
    
    
    Alexandre MEYER's avatar
    Alexandre MEYER committed
            # Ajouter une section pour chaque année
            if year != current_year:
                if current_year is not None:
                    html_content += "</ul></div>"
                current_year = year
                html_content += f"<div class='year-section'><h2>{year}</h2><ul>"
    
            # Construire la référence complète de la publication
            reference_parts = [title]
            if authors:
                reference_parts.append(authors)
    
    Alexandre MEYER's avatar
    Alexandre MEYER committed
            
            if doc_type == 'HDR':
                reference_parts.append("HDR, Université")
            
            if doc_type == 'Rapport de recherche':
                reference_parts.append("Rapport de recherche")
            
            if doc_type == 'Logiciel':
                reference_parts.append("Code informatique")
            
    
    Alexandre MEYER's avatar
    Alexandre MEYER committed
            if conference or journal:
                reference_parts.append(f"{conference}, {journal}, {year}")
            
            reference_text = '<br>'.join(part for part in reference_parts if part)
    
            # Ajouter la publication à la section de l'année correspondante
    
    Alexandre MEYER's avatar
    Alexandre MEYER committed
            html_content += f"""
    
    Alexandre MEYER's avatar
    Alexandre MEYER committed
                <li class="publication">
                    <p><strong>{title}</strong></p>
                    <p>{authors}</p>
                    <p>{conference or journal}, {year}</p>
    
    Alexandre MEYER's avatar
    Alexandre MEYER committed
                    <p><a href="{uri}">Lien vers HAL</a> {f'<a href="https://doi.org/{doi}">, Lien DOI</a>' if doi else ''}</p>
    
    Alexandre MEYER's avatar
    Alexandre MEYER committed
                </li>
    
    Alexandre MEYER's avatar
    Alexandre MEYER committed
            """
    
        html_content += """
    
    Alexandre MEYER's avatar
    Alexandre MEYER committed
            </ul></div>
    
    Alexandre MEYER's avatar
    Alexandre MEYER committed
        </body>
        </html>
        """
    
    
    Alexandre MEYER's avatar
    Alexandre MEYER committed
        with open('web/content/research/publications.html', 'w', encoding='utf-8') as f:
    
    Alexandre MEYER's avatar
    Alexandre MEYER committed
            f.write(html_content)
    
    # Exemple d'utilisation
    author_id = "alexandre-meyer"
    publications = get_publications_from_hal(author_id)
    
    if publications:
    
    Alexandre MEYER's avatar
    Alexandre MEYER committed
        create_html(publications)