import os
import requests

def get_hashes_from_url(url):
    """Fetches the content from the MD5 file URL and returns the hashes as a list."""
    try:
        response = requests.get(url)
        response.raise_for_status()
        hashes = response.text.splitlines()  # Each line contains one hash
        return [h.strip() for h in hashes if h.strip()]  # Remove empty lines and spaces
    except requests.exceptions.RequestException as e:
        print(f"Error accessing {url}: {e}")
        return []

def scrape_all_hash_files(base_url, start_page, end_page):
    """Scrapes hashes from all files starting from start_page to end_page."""
    all_hashes = []
    
    for page_number in range(start_page, end_page + 1):
        # Format the URL based on the page number
        page_url = f"{base_url}{page_number:05}.md5"  # Zero-padded page number
        print(f"Scraping page {page_number}: {page_url}")
        hashes = get_hashes_from_url(page_url)
        
        if hashes:
            all_hashes.extend(hashes)
        else:
            print(f"No hashes found or error on page {page_number}.")
    
    return all_hashes

def write_hashes_to_file(hashes, output_file="hash2.txt"):
    """Writes the list of hashes to a file in the required format."""
    with open(output_file, "w") as f:
        formatted_hashes = ", ".join(f'"{hash}"' for hash in hashes)
        f.write(formatted_hashes)

if __name__ == "__main__":
    base_url = "https://virusshare.com/hashfiles/VirusShare_"
    start_page = 0
    end_page = 486
    
    all_hashes = scrape_all_hash_files(base_url, start_page, end_page)
    
    if all_hashes:
        write_hashes_to_file(all_hashes)
        print(f"Hash values have been written to hash2.txt")
    else:
        print("No hashes found or error in scraping.")
